diff -urN linux-2.4.24-vanilla/CREDITS linux-2.4.24-ntfs-2.1.6a/CREDITS --- linux-2.4.24-vanilla/CREDITS 2003-11-28 18:26:19.000000000 +0000 +++ linux-2.4.24-ntfs-2.1.6a/CREDITS 2004-01-21 14:28:25.000000000 +0000 @@ -63,10 +63,9 @@ S: Belgium N: Anton Altaparmakov -E: aia21@cus.cam.ac.uk +E: aia21@cantab.net W: http://www-stu.christs.cam.ac.uk/~aia21/ -D: NTFS driver maintainer. NTFS fixes and cleanup. -D: Tiny fixes in linear md device and emu10k1 driver. +D: Author of new NTFS driver, various other kernel hacks. S: Christ's College S: Cambridge CB2 3BU S: United Kingdom diff -urN linux-2.4.24-vanilla/Documentation/Configure.help linux-2.4.24-ntfs-2.1.6a/Documentation/Configure.help --- linux-2.4.24-vanilla/Documentation/Configure.help 2003-11-28 18:26:19.000000000 +0000 +++ linux-2.4.24-ntfs-2.1.6a/Documentation/Configure.help 2004-01-21 14:28:25.000000000 +0000 @@ -16888,34 +16888,45 @@ NTFS file system support (read-only) CONFIG_NTFS_FS - NTFS is the file system of Microsoft Windows NT. Say Y if you want - to get read access to files on NTFS partitions of your hard drive. - The Linux NTFS driver supports most of the mount options of the VFAT - driver, see . Saying Y here - will give you read-only access to NTFS partitions. + NTFS is the file system of Microsoft Windows NT/2000/XP. For more + information see . Saying Y + here would allow you to read from NTFS partitions. - This code is also available as a module ( = code which can be + This file system is also available as a module ( = code which can be inserted in and removed from the running kernel whenever you want). The module will be called ntfs.o. If you want to compile it as a module, say M here and read . -NTFS write support (DANGEROUS) + If you are not using Windows NT/2000/XP in addition to Linux on your + computer it is safe to say N. + +CONFIG_NTFS_DEBUG + If you are experiencing any problems with the NTFS file system, say + Y here. This will result in additional consistency checks to be + performed by the driver as well as additional debugging messages to + be written to the system log. Note that debugging messages are + disabled by default. To enable them, supply the option debug_msgs=1 + at the kernel command line when booting the kernel or as an option + to insmod when loading the ntfs module. Once the driver is active, + you can enable debugging messages by doing (as root): + echo 1 > /proc/sys/fs/ntfs-debug + Replacing the "1" with "0" would disable debug messages. + + If you leave debugging messages disabled, this results in little + overhead, but enabling debug messages results in very significant + slowdown of the system. + + When reporting bugs, please try to have available a full dump of + debugging messages while the misbehaviour was occurring. + CONFIG_NTFS_RW - If you say Y here, you will (maybe) be able to write to NTFS file - systems as well as read from them. The read-write support in NTFS - is far from being complete and is not well tested. If you say Y - here, back up your NTFS volume first, since it will probably get - damaged. Also, download the Linux-NTFS project distribution from - Sourceforge at and always run the - included ntfsfix utility after writing to an NTFS partition from - Linux to fix some of the damage done by the driver. You should run - ntfsfix _after_ unmounting the partition in Linux but _before_ - rebooting into Windows. When Windows next boots, chkdsk will be - run automatically to fix the remaining damage. - Please note that write support is limited to Windows NT4 and - earlier versions. + This enables the experimental write support in the NTFS driver. - If unsure, say N. + WARNING: Do not use this option unless you are actively developing + NTFS as it is currently guaranteed to be broken and you + may lose all your data! + + It is strongly recommended and perfectly safe to say N here. System V/Xenix/V7/Coherent file system support CONFIG_SYSV_FS diff -urN linux-2.4.24-vanilla/Documentation/filesystems/ntfs.txt linux-2.4.24-ntfs-2.1.6a/Documentation/filesystems/ntfs.txt --- linux-2.4.24-vanilla/Documentation/filesystems/ntfs.txt 2001-12-21 17:41:53.000000000 +0000 +++ linux-2.4.24-ntfs-2.1.6a/Documentation/filesystems/ntfs.txt 2004-01-21 14:28:25.000000000 +0000 @@ -1,68 +1,115 @@ -NTFS Overview -============= +The Linux NTFS filesystem driver +================================ -Legato Systems, Inc. (http://www.legato.com) have sponsored Anton Altaparmakov -to develop NTFS on Linux since June 2001. -To mount an NTFS volume, use the filesystem type 'ntfs'. The driver -currently works only in read-only mode, with no fault-tolerance supported. +Table of contents +================= + +- Overview +- Supported mount options +- Features +- Known bugs and (mis-)features +- Using Software RAID with NTFS +- Limitiations when using the MD driver +- ChangeLog + + +Overview +======== + +To mount an NTFS 1.2/3.x (Windows NT4/2000/XP) volume, use the filesystem +type 'ntfs'. The driver currently works only in read-only mode, with no +fault-tolerance or journalling supported. + +For fault tolerance and raid support (i.e. volume and stripe sets), you can use +the kernel's Software RAID / MD driver. See section "Using Software RAID with +NTFS" for details. -If you enable the dangerous(!) write support, make sure you can recover -from a complete loss of data. Also, download the Linux-NTFS project -distribution from Sourceforge at http://sourceforge.net/projects/linux-ntfs/ -and always run the included ntfsfix utility after performing a write to an -NTFS partition from Linux to fix some of the damage done by the Linux NTFS -driver and to schedule an automatic chkdsk when Windows reboots. You should -run ntfsfix _after_ unmounting the partition in Linux but _before_ rebooting -into Windows. During the next reboot into Windows, chkdsk will be run -automatically fixing the remaining damage. If no errors are found it is a -good indication that the driver + ntfsfix together worked to full -satisfaction. (-; - -Please note that the experimental write support is limited to Windows NT4 and -earlier versions at the moment. - -If you think you have discovered a bug please have look at the "Known bugs" -section below to see whether it isn't known already. - -For ftdisk support, limited success was reported with volume sets on top of -the md driver, although mirror and stripe sets should work as well - if the -md driver can be talked into using the same layout as Windows NT. However, -using the md driver will fail if any of your NTFS partitions have an odd -number of sectors. Supported mount options ======================= -iocharset=name Character set to use when returning file names. +In addition to the generic mount options described by the manual page for the +mount command (man 8 mount, also see man 5 fstab), the NTFS driver supports the +following mount options: + +iocharset=name Deprecated option. Still supported but please use + nls=name in the future. See description for nls=name. + +nls=name Character set to use when returning file names. Unlike VFAT, NTFS suppresses names that contain unconvertible characters. Note that most character sets contain insufficient characters to represent all possible Unicode characters that can exist on NTFS. To be sure you are not missing any files, you are advised - to use the iocharset=utf8 which should be capable of - representing all Unicode characters. + to use nls=utf8 which is capable of representing all + Unicode characters. -utf8= Use UTF-8 for converting file names. - It is preferable - to use iocharset=utf8 instead, but if the utf8 NLS is - not available, you can use this utf8 option, which - enables the driver's builtin utf8 conversion functions. +utf8= Option no longer supported. Currently mapped to + nls=utf8 but please use nls=utf8 in the future and + make sure utf8 is compiled either as module or into + the kernel. See description for nls=name. uid= gid= -umask= These options work as documented in mount(8). - By default, the files are owned by root and - not readable by anyone else. - -posix= If enabled, the file system distinguishes between - upper and lower case. The 8.3 alias names are presented - as hard links instead of being suppressed. - -show_sys_files= If enabled, show all system files as normal files. Note - that $MFT does not appear unless specifically - requested. For example in bash, use: "ls -l \$MFT". - Be careful not to write anything to them or you could - crash the kernel and/or corrupt your file system! +umask= Provide default owner, group, and access mode mask. + These options work as documented in mount(8). By + default, the files/directories are owned by root and + he/she has read and write permissions, as well as + browse permission for directories. No one else has any + access permissions. I.e. the mode on all files is by + default rw------- and for directories rwx------, a + consequence of the default fmask=0177 and dmask=0077. + Using a umask of zero will grant all permissions to + everyone, i.e. all files and directories will have mode + rwxrwxrwx. + +fmask= +dmask= Instead of specifying umask which applies both to + files and directories, fmask applies only to files and + dmask only to directories. + +sloppy= If sloppy is specified, ignore unknown mount options. + Otherwise the default behaviour is to abort mount if + any unknown options are found. + +show_sys_files= If show_sys_files is specified, show the system files + in directory listings. Otherwise the default behaviour + is to hide the system files. + Note that even when show_sys_files is specified, "$MFT" + will not be visible due to bugs/mis-features in glibc. + Further, note that irrespective of show_sys_files, all + files are accessible by name, i.e. you can always do + "ls -l \$UpCase" for example to specifically show the + system file containing the Unicode upcase table. + +case_sensitive= If case_sensitive is specified, treat all file names as + case sensitive and create file names in the POSIX + namespace. Otherwise the default behaviour is to treat + file names as case insensitive and to create file names + in the WIN32/LONG name space. Note, the Linux NTFS + driver will never create short file names and will + remove them on rename/delete of the corresponding long + file name. + Note that files remain accessible via their short file + name, if it exists. If case_sensitive, you will need to + provide the correct case of the short file name. + +errors=opt What to do when critical file system errors are found. + Following values can be used for "opt": + continue: DEFAULT, try to clean-up as much as + possible, e.g. marking a corrupt inode as + bad so it is no longer accessed, and then + continue. + recover: At present only supported is recovery of + the boot sector from the backup copy. If a + read-only mount, the recovery is done in + memory only and not written to disk. + Note that the options are additive, i.e. specifying: + errors=continue,errors=recover + This means the driver will attempt to recover and if + that fails it will clean-up as much as possible and + continue. mft_zone_multiplier= Set the MFT zone multiplier for the volume (this setting is not persistent across mounts and can be @@ -82,173 +129,261 @@ 2 25% 3 37.5% 4 50% + Note this option is irrelevant for read-only mounts. + + +Features +======== + +- This is a complete rewrite of the NTFS driver that used to be in the kernel. + This new driver implements NTFS read support and is functionally equivalent + to the old ntfs driver. +- The new driver has full support for sparse files on NTFS 3.x volumes which + the old driver isn't happy with. +- The new driver supports execution of binaries due to mmap() now being + supported. +- A comparison of the two drivers using: + time find . -type f -exec md5sum "{}" \; + run three times in sequence with each driver (after a reboot) on a 1.4GiB + NTFS partition, showed the new driver to be 20% faster in total time elapsed + (from 9:43 minutes on average down to 7:53). The time spent in user space + was unchanged but the time spent in the kernel was decreased by a factor of + 2.5 (from 85 CPU seconds down to 33). +- The driver does not support short file names in general. For backwards + compatibility, we implement access to files using their short file names if + they exist. The driver will not create short file names however, and a rename + will discard any existing short file name. + Known bugs and (mis-)features ============================= -- Do not use the driver for writing as it corrupts the file system. If you do - use it, get the Linux-NTFS tools and use the ntfsfix utility after - dismounting a partition you wrote to. +- The link count on each directory inode entry is set to 1, due to Linux not + supporting directory hard links. This may well confuse some user space + applications, since the directory names will have the same inode numbers. + This also speeds up ntfs_read_inode() immensely. And we haven't found any + problems with this approach so far. If you find a problem with this, please + let us know. -- Writing of extension records is not supported properly. -Please send bug reports/comments/feed back/abuse to the Linux-NTFS development +Please send bug reports/comments/feedback/abuse to the Linux-NTFS development list at sourceforge: linux-ntfs-dev@lists.sourceforge.net + +Using Software RAID with NTFS +============================= + +For support of volume and stripe sets, use the kernel's Software RAID / MD +driver and set up your /etc/raidtab appropriately (see man 5 raidtab). + +Linear volume sets, i.e. linear raid, as well as stripe sets, i.e. raid level 0, +have been tested and work fine (though see section "Limitiations when using the +MD driver with NTFS volumes" especially if you want to use linear raid). Even +though untested, there is no reason why mirrors, i.e. raid level 1, and stripes +with parity, i.e. raid level 5, should not work, too. + +You have to use the "persistent-superblock 0" option for each raid-disk in the +NTFS volume/stripe you are configuring in /etc/raidtab as the persistent +superblock used by the MD driver would damange the NTFS volume. + +Windows by default uses a stripe chunk size of 64k, so you probably want the +"chunk-size 64k" option for each raid-disk, too. + +For example, if you have a stripe set consisting of two partitions /dev/hda5 +and /dev/hdb1 your /etc/raidtab would look like this: + +raiddev /dev/md0 + raid-level 0 + nr-raid-disks 2 + nr-spare-disks 0 + persistent-superblock 0 + chunk-size 64k + device /dev/hda5 + raid-disk 0 + device /dev/hdb1 + raid-disl 1 + +For linear raid, just change the raid-level above to "raid-level linear", for +mirrors, change it to "raid-level 1", and for stripe sets with parity, change +it to "raid-level 5". + +Note for stripe sets with parity you will also need to tell the MD driver which +parity algorithm to use by specifying the option "parity-algorithm which", +where you need to replace "which" with the name of the algorithm to use (see +man 5 raidtab for available algorithms) and you will have to try the different +available algorithms until you find one that works. Make sure you are working +read-only when playing with this as you may damage your data otherwise. If you +find which algorithm works please let us know (email the linux-ntfs developers +list linux-ntfs-dev@lists.sourceforge.net or drop in on IRC in channel #ntfs +on the irc.openprojects.net network) so we can update this documentation. + +Once the raidtab is setup, run for example raid0run -a to start all devices or +raid0run /dev/md0 to start a particular md device, in this case /dev/md0. + +Then just use the mount command as usual to mount the ntfs volume using for +example: mount -t ntfs -o ro /dev/md0 /mnt/myntfsvolume + +It is advisable to do the mount read-only to see if the md volume has been +setup correctly to avoid the possibility of causing damage to the data on the +ntfs volume. + + +Limitiations when using the MD driver +===================================== + +Using the md driver will not work properly if any of your NTFS partitions have +an odd number of sectors. This is especially important for linear raid as all +data after the first partition with an odd number of sectors will be offset by +one or more sectors so if you mount such a partition with write support you +will cause massive damage to the data on the volume which will only become +apparent when you try to use the volume again under Windows. + +So when using linear raid, make sure that all your partitions have an even +number of sectors BEFORE attempting to use it. You have been warned! + + ChangeLog ========= -NTFS 1.1.21: - - Fixed bug with reading $MFT where we try to read higher mft records - before having read the $DATA attribute of $MFT. (Note this is only a - partial solution which will only work in the case that the attribute - list is resident or non-resident but $DATA is in the first 1024 - bytes. But this should be enough in the majority of cases. I am not - going to bother fixing the general case until someone finds this to - be a problem for them, which I doubt very much will ever happen...) - - Fixed bogus BUG() call in readdir(). - -NTFS 1.1.20: - - Fixed two bugs in ntfs_readwrite_attr(). Thanks to Jan Kara for - spotting the out of bounds one. - - Check return value of set_blocksize() in ntfs_read_super() and make - use of get_hardsect_size() to determine the minimum block size. - - Fix return values of ntfs_vcn_to_lcn(). This should stop - peoples start of partition being overwritten at random. - -NTFS 1.1.19: - - Fixed ntfs_getdir_unsorted(), ntfs_readdir() and ntfs_printcb() to - cope with arbitrary cluster sizes. Very important for Win2k+. Also, - make them detect directories which are too large and truncate the - enumeration pretending end of directory was reached. Detect more - error conditions and overflows. All this fixes the problem where the - driver could end up in an infinite loop under certain circumstances. - - Fixed potential memory leaks in Unicode conversion functions and - setup correct NULL return values. - -NTFS 1.1.18: - - - Enhanced & bug fixed cluster deallocation (race fixes, etc.) - - Complete rewrite of cluster allocation, now race free. - - Fixed several bugs in the attribute modification codepaths. - - Hopefully fixed bug where the first sectors of some people's - partitions would be overwritten by the mft. And in general fixed up - mft extension code a bit (still incomplete though). - - Introduce splice_runlist() to allow generic splicing of two run - lists into one. - - MFT zone is now implemented. [Stage 2 of 3; only lack dynamic - growing of mft zone but that is AFAIK not even done by Windows, and - the overhead would be so large that it is probably not worth doing - at all, so Stage 3 might never happen...] - - Complete rewrite of $MFT extension and ntfs inode allocation code. - - Made the NTFS driver initialization string show the compile options - used (i.e. whether read-only or read-write, whether a module, and - whether with debug support). - - Modify ntfs_fill_mft_header() to set all fields and to accept more - arguments. - - Get rid of superfluous add_mft_header(). - - Get rid of some unused code. - - Fixed several bugs in and generally cleaned up ntfs_readdir, - ntfs_getdir_unsorted(), and ntfs_printcb. Now they spew out huge - amounts of debug output if debugging is enabled. This will be - removed once I know that this works for everyone. - - ntfs_readdir now shows hidden files. The only files that are now - hidden are the first 16 inodes (i.e. the hard coded system files), - which is consistent with Windows NT4. Using the show_sys_files mount - option, these files are then shown, too. - - Fixed the displaying of the "." and ".." directories. We still cannot - cope with more than 65536 files in a directory index block which is - not a problem and we now cannot cope with more than 32766 directory - index blocks which should not be a problem unless you have a - directory with an insanely large number of files in it. The exact - number depends on the length of the file names of the directory - entries and on the size of the dircetory index blocks. - - Fixed all problems with the last file in a directory (e.g. the last - file should no longer disappear and tab completion should work). If - there are still disappearing files or any other problems with the - last file in a directory, please report them! Thanks. - - Rewrote ntfs_extend_attr() to use the new cluster allocator and the - freshly introduced splice_runlists() function. This simplified - ntfs_extend_attr() a lot which in turn seems to have removed one or - more bugs from it. - - Probably other things I have forgotten... (-; - - Removed dollar signs from the names in the system file enumeration. - Apparently gcc doesn't support dollar signs on PPC architecture. - (Andrzej Krzysztofowicz) - -NTFS 1.1.17: - - - Fixed system file handling. No longer need to use show_sys_files - option for driver to work fine. System files are now always treated - the same, but without the option, they are made invisible to - directory listings. As a result system files can once again be opened - even without the show_sys_files option. This is important for the - statfs system call to work properly, for example. - - Implemented MFT zone including mount parameter to tune it (just like - in Windows via the registry, only we make it per mount rather than - global for the whole driver, so we are better but we have no way of - storing the value as we don't have a registry so either specify on - each mount or put it in /etc/fstab). [Stage 1 of 3, mount parameter - handling.] - - Fixed fixup functions to handle corruption cases and to return error - codes to the caller. - - Made fixup functions apply hotfixes where sensible. [Stage 1 of 2+, - in memory only.] - - Fixed ommission of "NTFS: " string in ntfs_error() output. - - Fixed stupid if statement bug in unistr.c. Thanks to Yann E. Morin - for spotting it. - - Get rid of all uses of max and min macros. This actually allowed for - optimizing the code in several places so it was a Good Thing(TM). - - Make ntfs use generic_file_open to enforce the O_LARGEFILE flag. - - Detect encrypted files and refuse to access them (return EACCES - error code to user space). - - Fix handling of encrypted & compressed files so that an encrypted - file no longer is considered to be compressed (this was causing - kernel segmentation faults). - -NTFS 1.1.16: - - - Removed non-functional uni_xlate mount options. - - Clarified the semantics of the utf8 and iocharset mount options. - - Threw out the non-functional mount options for using hard coded - character set conversion. Only kept utf8 one. - - Fixed handling of mount options and proper handling of faulty mount - options on remount. - - Cleaned up character conversion which basically became simplified a - lot due to the removal of the above mentioned mount options. - - Made character conversion to be always consistent. Previously we - could output to the VFS file names which we would then not accept - back from the VFS so in effect we were generating ghost entries in - the directory listings which could not be accessed by any means. - - Simplified time conversion functions drastically without sacrificing - accuracy. (-8 - - Fixed a use of a pointer before the check for the pointer being - NULL, reported by the Stanford checker. - - Fixed several missing error checks, reported by the Stanford - checker and fixed by Rasmus Andersen. - -NTFS 1.1.15 (changes since kernel 2.4.4's NTFS driver): - - - New mount option show_sys_files= to show all system files as - normal files. - - Support for files and in general any attributes up to the full 2TiB - size supported by the NTFS filesystem. Note we only support up to - 32-bits worth of inodes/clusters at this point. - - Support for more than 128kiB sized runlists (using vmalloc_32() - instead of kmalloc()). - - Fixed races in allocation of clusters and mft records. - - Fixed major bugs in attribute handling / searching / collation. - - Fixed major bugs in compressing a run list into a mapping pairs array. - - Fixed major bugs in inode allocation. Especially file create and - mkdir. - - Fixed memory leaks. - - Fixed major bug in inode layout assignment of sequence numbers. - - Lots of other bug fixes I can't think of right now... - - Fixed NULL bug found by the Stanford checker in ntfs_dupuni2map(). - - Convert large stack variable to dynamically allocated one in - ntfs_get_free_cluster_count() (found by Stanford checker). - -Kernel 2.4.4: +Note, a technical ChangeLog aimed at kernel hackers is in fs/ntfs/ChangeLog. +2.1.6a: + - Fix minor bug in handling of compressed directories that fixes the + erroneous "du" and "stat" output people reported. +2.1.5a: + - Minor bug fix in attribute list attribute handling that fixes the + I/O errors on "ls" of certain fragmented files found by at least two + people running Windows XP. +2.1.4a: + - Minor update allowing compilation with all gcc versions (well, the + ones the kernel can be compiled with anyway). +2.1.3a: + - Major bug fixes for reading files and volumes in corner cases which + were being hit by Windows 2k/XP users. +2.1.2a: + - Major bug fixes aleviating the hangs in statfs experienced by some + users. +2.1.1a: + - Major bug fix aleviating the random hangs experienced by some users. + - Update handling of compressed files so people no longer get the + frequently reported warning messages about initialized_size != + data_size. +2.1.0a: + - Sync up with ntfs 2.1.0. +2.1.0: + - Add configuration option for developmental write support. + - Initial implementation of file overwriting. (Writes to resident files + are not written out to disk yet, so avoid writing to files smaller + than about 1kiB.) + - Intercept/abort changes in file size as they are not implemented yet. +2.0.25a: + - Sync up with ntfs 2.0.25. +2.0.25: + - Minor bugfixes in error code paths and small cleanups. +2.0.24: + - Small internal cleanups. + - Support for sendfile system call. (Christoph Hellwig) +2.0.23(a): + - Massive internal locking changes to mft record locking. Fixes + various race conditions and deadlocks. + - Fix ntfs over loopback for compressed files by adding an + optimization barrier. (gcc was screwing up otherwise ?) + Thanks go to Christoph Hellwig for pointing these two out: + - Remove now unused function fs/ntfs/malloc.h::vmalloc_nofs(). + - Fix ntfs_free() for ia64 and parisc. +2.0.22a: + - Sync with NTFS 2.0.22 and kernel 2.4.19. +2.0.22: + - Small internal cleanups. +2.0.21a: + - Resync 2.4.18 kernel backport with latest 2.5.x kernel ntfs release. +2.0.21: + These only affect 32-bit architectures: + - Check for, and refuse to mount too large volumes (maximum is 2TiB). + - Check for, and refuse to open too large files and directories + (maximum is 16TiB). +2.0.20: + - Support non-resident directory index bitmaps. This means we now cope + with huge directories without problems. + - Fix a page leak that manifested itself in some cases when reading + directory contents. + - Internal cleanups. +2.0.19: + - Fix race condition and improvements in block i/o interface. + - Optimization when reading compressed files. +2.0.18: + - Fix race condition in reading of compressed files. +2.0.17: + - Cleanups and optimizations. +2.0.16: + - Fix stupid bug introduced in 2.0.15 in new attribute inode API. + - Big internal cleanup replacing the mftbmp access hacks by using the + new attribute inode API instead. +2.0.15: + - Bug fix in parsing of remount options. + - Internal changes implementing attribute (fake) inodes allowing all + attribute i/o to go via the page cache and to use all the normal + vfs/mm functionality. +2.0.14: + - Internal changes improving run list merging code and minor locking + change to not rely on BKL in ntfs_statfs(). +2.0.13: + - Internal changes towards using iget5_locked() in preparation for + fake inodes and small cleanups to ntfs_volume structure. +2.0.12a: + - Resync 2.4.18 backport with 2.0.12. +2.0.12: + - Internal cleanups in address space operations made possible by the + changes introduced in the previous release. +2.0.11: + - Internal updates and cleanups introducing the first step towards + fake inode based attribute i/o. +2.0.10: + - Microsoft says that the maximum number of inodes is 2^32 - 1. Update + the driver accordingly to only use 32-bits to store inode numbers on + 32-bit architectures. This improves the speed of the driver a little. +2.0.9: + - Change decompression engine to use a single buffer. This should not + affect performance except perhaps on the most heavy i/o on SMP + systems when accessing multiple compressed files from multiple + devices simultaneously. + - Minor updates and cleanups. +2.0.8: + - Remove now obsolete show_inodes and posix mount option(s). + - Restore show_sys_files mount option. + - Add new mount option case_sensitive, to determine if the driver + treats file names as case sensitive or not. + - Mostly drop support for short file names (for backwards compatibility + we only support accessing files via their short file name if one + exists). + - Fix dcache aliasing issues wrt short/long file names. + - Cleanups and minor fixes. +2.0.7: + - Just cleanups. +2.0.6b: + - Remove relevant parts of the patch. +2.0.6a: + - Backport from 2.5.x to 2.4.18. +2.0.6: + - Major bugfix to make compatible with other kernel changes. This fixes + the hangs/oopses on umount. + - Locking cleanup in directory operations (remove BKL usage). +2.0.5: + - Major buffer overflow bug fix. + - Minor cleanups and updates for kernel 2.5.12. +2.0.4: + - Cleanups and updates for kernel 2.5.11. +2.0.3: + - Small bug fixes, cleanups, and performance improvements. +2.0.2: + - Use default fmask of 0177 so that files are no executable by default. + If you want owner executable files, just use fmask=0077. + - Update for kernel 2.5.9 but preserve backwards compatibility with + kernel 2.5.7. + - Minor bug fixes, cleanups, and updates. +2.0.1: + - Minor updates, primarily set the executable bit by default on files + so they can be executed. +2.0.0: - Started ChangeLog. diff -urN linux-2.4.24-vanilla/MAINTAINERS linux-2.4.24-ntfs-2.1.6a/MAINTAINERS --- linux-2.4.24-vanilla/MAINTAINERS 2003-11-28 18:26:19.000000000 +0000 +++ linux-2.4.24-ntfs-2.1.6a/MAINTAINERS 2004-01-21 14:28:25.000000000 +0000 @@ -1320,9 +1320,10 @@ NTFS FILESYSTEM P: Anton Altaparmakov -M: aia21@cus.cam.ac.uk +M: aia21@cantab.net L: linux-ntfs-dev@lists.sourceforge.net L: linux-kernel@vger.kernel.org +W: http://linux-ntfs.sf.net/ S: Maintained NVIDIA (RIVA) FRAMEBUFFER DRIVER diff -urN linux-2.4.24-vanilla/arch/i386/defconfig linux-2.4.24-ntfs-2.1.6a/arch/i386/defconfig --- linux-2.4.24-vanilla/arch/i386/defconfig 2003-11-28 18:26:19.000000000 +0000 +++ linux-2.4.24-ntfs-2.1.6a/arch/i386/defconfig 2004-01-21 14:28:25.000000000 +0000 @@ -668,6 +668,7 @@ # CONFIG_MINIX_FS is not set # CONFIG_VXFS_FS is not set # CONFIG_NTFS_FS is not set +# CONFIG_NTFS_DEBUG is not set # CONFIG_NTFS_RW is not set # CONFIG_HPFS_FS is not set CONFIG_PROC_FS=y diff -urN linux-2.4.24-vanilla/fs/Config.in linux-2.4.24-ntfs-2.1.6a/fs/Config.in --- linux-2.4.24-vanilla/fs/Config.in 2003-11-28 18:26:21.000000000 +0000 +++ linux-2.4.24-ntfs-2.1.6a/fs/Config.in 2004-01-21 14:28:25.000000000 +0000 @@ -65,7 +65,9 @@ tristate 'Minix fs support' CONFIG_MINIX_FS tristate 'FreeVxFS file system support (VERITAS VxFS(TM) compatible)' CONFIG_VXFS_FS + tristate 'NTFS file system support (read only)' CONFIG_NTFS_FS +dep_mbool ' NTFS debugging support' CONFIG_NTFS_DEBUG $CONFIG_NTFS_FS dep_mbool ' NTFS write support (DANGEROUS)' CONFIG_NTFS_RW $CONFIG_NTFS_FS $CONFIG_EXPERIMENTAL tristate 'OS/2 HPFS file system support' CONFIG_HPFS_FS diff -urN linux-2.4.24-vanilla/fs/ntfs/ChangeLog linux-2.4.24-ntfs-2.1.6a/fs/ntfs/ChangeLog --- linux-2.4.24-vanilla/fs/ntfs/ChangeLog 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.4.24-ntfs-2.1.6a/fs/ntfs/ChangeLog 2004-01-21 14:28:25.000000000 +0000 @@ -0,0 +1,864 @@ +ToDo: + - Find and fix bugs. + - Enable NFS exporting of NTFS. + - Implement aops->set_page_dirty() in order to take control of buffer + dirtying. Not having it means if page_has_buffers(), all buffers + will be dirtied with the page. And if not they won't be. That is + fine for the moment but will break once we enable metadata updates. + - Implement sops->dirty_inode() to implement {a,m,c} time updates and + such things. + - Implement sops->write_inode(). + - In between ntfs_prepare/commit_write, need exclusion between + simultaneous file extensions. Need perhaps an NInoResizeUnderway() + flag which we can set in ntfs_prepare_write() and clear again in + ntfs_commit_write(). Just have to be careful in readpage/writepage, + as well as in truncate, that we play nice... We might need to have + a data_size field in the ntfs_inode to store the real attribute + length. Also need to be careful with initialized_size extention in + ntfs_prepare_write. Basically, just be _very_ careful in this code... + OTOH, perhaps i_sem, which is held accross generic_file_write is + sufficient for synchronisation here. We then just need to make sure + ntfs_readpage/writepage/truncate interoperate properly with us. + +2.1.6a - Fix minor bug in handling of compressed directories. + + - Fix bug in handling of compressed directories. A compressed + directory is not really compressed so when we set the ->i_blocks + field of a compressed directory inode we were setting it from the + non-existing field ni->itype.compressed.size which gave random + results... For directories we now always use ni->allocated_size. + +2.1.5a - Fix minor bug in attribute list attribute handling. + + - Fix bug in attribute list handling. Actually it is not as much a bug + as too much protection in that we were not allowing attribute lists + which waste space on disk while Windows XP clearly allows it and in + fact creates such attribute lists so our driver was failing. + +2.1.4a - Reduce compiler requirements. + + - Remove all uses of unnamed structs and unions in the driver to make + old and newer gcc versions happy. Makes it a bit uglier IMO but at + least people will stop hassling me about it. + +2.1.3a - Important bug fixes in corner cases. + + - super.c::parse_ntfs_boot_sector(): Correct the check for 64-bit + clusters. (Philipp Thomas ) + +2.1.2a - Important bug fixes aleviating the hangs in statfs. + + - Fix buggy free cluster and free inode determination logic. + - Reduce function local stack usage from 0x3d4 bytes to just noise in + fs/ntfs/upcase.c. (Randy Dunlap ) + +2.1.1a - Important bug fix aleviating the random hangs. + + - Add handling for initialized_size != data_size in compressed files. + - Remove compiler warnings for newer gcc. + - Fix stupid logic inversion bug in ntfs inode lock handling. + +2.1.0a - Sync up with ntfs 2.1.0. + +2.1.0 - First steps towards write support: implement file overwrite. + + - Add configuration option for developmental write support with an + appropriately scary configuration help text. + - Initial implementation of fs/ntfs/aops.c::ntfs_writepage() and its + helper fs/ntfs/aops.c::ntfs_write_block(). This enables mmap(2) based + overwriting of existing files on ntfs. Note: Resident files are + only written into memory, and not written out to disk at present, so + avoid writing to files smaller than about 1kiB. + - Initial implementation of fs/ntfs/aops.c::ntfs_prepare_write(), its + helper fs/ntfs/aops.c::ntfs_prepare_nonresident_write() and their + counterparts, fs/ntfs/aops.c::ntfs_commit_write(), and + fs/ntfs/aops.c::ntfs_commit_nonresident_write(), respectively. Also, + add generic_file_write() to the ntfs file operations (fs/ntfs/file.c). + This enables write(2) based overwriting of existing files on ntfs. + Note: As with mmap(2) based overwriting, resident files are only + written into memory, and not written out to disk at present, so avoid + writing to files smaller than about 1kiB. + - Implement ->truncate (fs/ntfs/inode.c::ntfs_truncate()) and + ->setattr() (fs/ntfs/inode.c::ntfs_setattr()) inode operations for + files with the purpose of intercepting and aborting all i_size + changes which we do not support yet. ntfs_truncate() actually only + emits a warning message but AFAICS our interception of i_size changes + elsewhere means ntfs_truncate() never gets called for i_size changes. + It is only called from generic_file_write() when we fail in + ntfs_prepare_{,nonresident_}write() in order to discard any + instantiated buffers beyond i_size. Thus i_size is not actually + changed so our warning message is enough. Unfortunately it is not + possible to easily determine if i_size is being changed or not hence + we just emit an appropriately worded error message. + +2.0.25a - Sync up with ntfs 2.0.25. + +2.0.25 - Small bug fixes and cleanups. + + - Unlock the page in an out of memory error code path in + fs/ntfs/aops.c::ntfs_read_block(). + - If fs/ntfs/aops.c::ntfs_read_page() is called on an uptodate page, + just unlock the page and return. (This can happen due to ->writepage + clearing PageUptodate() during write out of MstProtected() + attributes. + - Remove leaked write code again. + +2.0.24 - Cleanups. + + - Treat BUG_ON() as ASSERT() not VERIFY(), i.e. do not use side effects + inside BUG_ON(). (Adam J. Richter) + - Split logical OR expressions inside BUG_ON() into individual BUG_ON() + calls for improved debugging. (Adam J. Richter) + - Add errors flag to the ntfs volume state, accessed via + NVol{,Set,Clear}Errors(vol). + - Do not allow read-write remounts of read-only volumes with errors. + - Clarify comment for ntfs file operation sendfile which was added by + Christoph Hellwig a while ago (just using generic_file_sendfile()) + to say that ntfs ->sendfile is only used for the case where the + source data is on the ntfs partition and the destination is + somewhere else, i.e. nothing we need to concern ourselves with. + - Add generic_file_write() as our ntfs file write operation. + +2.0.23b - Minor updates. + + - Sync with 2.0.23. + - Don't export kmap_pte and kmap_prot as the global symbols. They + belong only to ppc, i386, mips and sparc architectures (found by + Jörg Prante). + - Don't use side-effects inside BUG_ON() and split logical OR + expressions inside BUG_ON() into two sequential BUG_ON() calls. + (Adam J. Richter) + +2.0.23a - Sync with 2.0.23. + +2.0.23 - Major bug fixes (races, deadlocks, non-i386 architectures). + + - Massive internal locking changes to mft record locking. Fixes lock + recursion and replaces the mrec_lock read/write semaphore with a + mutex. Also removes the now superfluous mft_count. This fixes several + race conditions and deadlocks, especially in the future write code. + - Fix ntfs over loopback for compressed files by adding an + optimization barrier. (gcc was screwing up otherwise ?) + - Miscellaneous cleanups all over the code and a fix or two in error + handling code paths. + Thanks go to Christoph Hellwig for pointing out the following two: + - Remove now unused function fs/ntfs/malloc.h::vmalloc_nofs(). + - Fix ntfs_free() for ia64 and parisc by checking for VMALLOC_END, too. + +2.0.22a - Updates of the backport. + + - Sync with NTFS 2.0.22. + - Update to 2.4.19 kernel. + +2.0.22 - Cleanups, mainly to ntfs_readdir(), and use C99 initializers. + + - Change fs/ntfs/dir.c::ntfs_reddir() to only read/write ->f_pos once + at entry/exit respectively. + - Use C99 initializers for structures. + - Remove unused variable blocks from fs/ntfs/aops.c::ntfs_read_block(). + +2.0.21a - Resync 2.4.18 kernel backport with latest 2.5.x kernel ntfs release. + +2.0.21 - Check for, and refuse to work with too large files/directories/volumes. + + - Limit volume size at mount time to 2TiB on architectures where + unsigned long is 32-bits (fs/ntfs/super.c::parse_ntfs_boot_sector()). + This is the most we can do without overflowing the 32-bit limit of + the block device size imposed on us by sb_bread() and sb_getblk() + for the time being. + - Limit file/directory size at open() time to 16TiB on architectures + where unsigned long is 32-bits (fs/ntfs/file.c::ntfs_file_open() and + fs/ntfs/dir.c::ntfs_dir_open()). This is the most we can do without + overflowing the page cache page index. + +2.0.20 - Support non-resident directory index bitmaps, fix page leak in readdir. + + - Move the directory index bitmap to use an attribute inode instead of + having special fields for it inside the ntfs inode structure. This + means that the index bitmaps now use the page cache for i/o, too, + and also as a side effect we get support for non-resident index + bitmaps for free. + - Simplify/cleanup error handling in fs/ntfs/dir.c::ntfs_readdir() and + fix a page leak that manifested itself in some cases. + - Add fs/ntfs/inode.c::ntfs_put_inode(), which we need to release the + index bitmap inode on the final iput(). + +2.0.19 - Fix race condition, improvements, and optimizations in i/o interface. + + - Apply block optimization added to fs/ntfs/aops.c::ntfs_read_block() + to fs/ntfs/compress.c::ntfs_file_read_compressed_block() as well. + - Drop the "file" from ntfs_file_read_compressed_block(). + - Rename fs/ntfs/aops.c::ntfs_enb_buffer_read_async() to + ntfs_end_buffer_async_read() (more like the fs/buffer.c counterpart). + - Update ntfs_end_buffer_async_read() with the improved logic from + its updated counterpart fs/buffer.c::end_buffer_async_read(). Apply + further logic improvements to better determine when we set PageError. + - Update submission of buffers in fs/ntfs/aops.c::ntfs_read_block() to + check for the buffers being uptodate first in line with the updated + fs/buffer.c::block_read_full_page(). This plugs a small race + condition. + +2.0.18 - Fix race condition in reading of compressed files. + + - There was a narrow window between checking a buffer head for being + uptodate and locking it in ntfs_file_read_compressed_block(). We now + lock the buffer and then check whether it is uptodate or not. + +2.0.17 - Cleanups and optimizations - shrinking the ToDo list. + + - Modify fs/ntfs/inode.c::ntfs_read_locked_inode() to return an error + code and update callers, i.e. ntfs_iget(), to pass that error code + up instead of just using -EIO. + - Modifications to super.c to ensure that both mount and remount + cannot set any write related options when the driver is compiled + read-only. + - Optimize block resolution in fs/ntfs/aops.c::ntfs_read_block() to + cache the current run list element. This should improve performance + when reading very large and/or very fragmented data. + +2.0.16 - Convert access to $MFT/$BITMAP to attribute inode API. + + - Fix a stupid bug introduced in 2.0.15 where we were unmapping the + wrong inode in fs/ntfs/inode.c::ntfs_attr_iget(). + - Fix debugging check in fs/ntfs/aops.c::ntfs_read_block(). + - Convert $MFT/$BITMAP access to attribute inode API and remove all + remnants of the ugly mftbmp address space and operations hack. This + means we finally have only one readpage function as well as only one + async io completion handler. Yey! The mft bitmap is now just an + attribute inode and is accessed from vol->mftbmp_ino just as if it + were a normal file. Fake inodes rule. (-: + +2.0.15 - Fake inodes based attribute i/o via the pagecache, fixes and cleanups. + + - Fix silly bug in fs/ntfs/super.c::parse_options() which was causing + remounts to fail when the partition had an entry in /etc/fstab and + the entry specified the nls= option. + - Apply same macro magic used in fs/ntfs/inode.h to fs/ntfs/volume.h to + expand all the helper functions NVolFoo(), NVolSetFoo(), and + NVolClearFoo(). + - Move copyright statement from driver initialisation message to + module description (fs/super.c). This makes the initialisation + message fit on one line and fits in better with rest of kernel. + - Update fs/ntfs/attrib.c::map_run_list() to work on both real and + attribute inodes, and both for files and directories. + - Implement fake attribute inodes allowing all attribute i/o to go via + the page cache and to use all the normal vfs/mm functionality: + - Add ntfs_attr_iget() and its helper ntfs_read_locked_attr_inode() + to fs/ntfs/inode.c. + - Add needed cleanup code to ntfs_clear_big_inode(). + - Merge address space operations for files and directories (aops.c), + now just have ntfs_aops: + - Rename: + end_buffer_read_attr_async() -> ntfs_end_buffer_read_async(), + ntfs_attr_read_block() -> ntfs_read_block(), + ntfs_file_read_page() -> ntfs_readpage(). + - Rewrite fs/ntfs/aops.c::ntfs_readpage() to work on both real and + attribute inodes, and both for files and directories. + - Remove obsolete fs/ntfs/aops.c::ntfs_mst_readpage(). + +2.0.14 - Run list merging code cleanup, minor locking changes, typo fixes. + + - Change fs/ntfs/super.c::ntfs_statfs() to not rely on BKL by moving + the locking out of super.c::get_nr_free_mft_records() and taking and + dropping the mftbmp_lock rw_semaphore in ntfs_statfs() itself. + - Bring attribute run list merging code (fs/ntfs/attrib.c) in sync with + current userspace ntfs library code. This means that if a merge + fails the original run lists are always left unmodified instead of + being silently corrupted. + - Misc typo fixes. + +2.0.13 - Use iget5_locked() in preparation for fake inodes and small cleanups. + + - Remove nr_mft_bits and the now superfluous union with nr_mft_records + from ntfs_volume structure. + - Remove nr_lcn_bits and the now superfluous union with nr_clusters + from ntfs_volume structure. + - Use iget5_locked() and friends instead of conventional iget(). Wrap + the call in fs/ntfs/inode.c::ntfs_iget() and update callers of iget() + to use ntfs_iget(). Leave only one iget() call at mount time so we + don't need an ntfs_iget_mount(). + - Change fs/ntfs/inode.c::ntfs_new_extent_inode() to take mft_no as an + additional argument. + +2.0.12a - Resync 2.4.18 backport with 2.0.12. + +2.0.12 - Initial cleanup of address space operations following 2.0.11 changes. + + - Merge fs/ntfs/aops.c::end_buffer_read_mst_async() and + fs/ntfs/aops.c::end_buffer_read_file_async() into one function + fs/ntfs/aops.c::end_buffer_read_attr_async() using NInoMstProtected() + to determine whether to apply mst fixups or not. + - Above change allows merging fs/ntfs/aops.c::ntfs_file_read_block() + and fs/ntfs/aops.c::ntfs_mst_readpage() into one function + fs/ntfs/aops.c::ntfs_attr_read_block(). Also, create a tiny wrapper + fs/ntfs/aops.c::ntfs_mst_readpage() to transform the parameters from + the VFS readpage function prototype to the ntfs_attr_read_block() + function prototype. + +2.0.11 - Initial preparations for fake inode based attribute i/o. + + - Move definition of ntfs_inode_state_bits to fs/ntfs/inode.h and + do some macro magic (adapted from include/linux/buffer_head.h) to + expand all the helper functions NInoFoo(), NInoSetFoo(), and + NInoClearFoo(). + - Add new flag to ntfs_inode_state_bits: NI_Sparse. + - Add new fields to ntfs_inode structure to allow use of fake inodes + for attribute i/o: type, name, name_len. Also add new state bits: + NI_Attr, which, if set, indicates the inode is a fake inode, and + NI_MstProtected, which, if set, indicates the attribute uses multi + sector transfer protection, i.e. fixups need to be applied after + reads and before/after writes. + - Rename fs/ntfs/inode.c::ntfs_{new,clear,destroy}_inode() to + ntfs_{new,clear,destroy}_extent_inode() and update callers. + - Use ntfs_clear_extent_inode() in fs/ntfs/inode.c::__ntfs_clear_inode() + instead of ntfs_destroy_extent_inode(). + - Cleanup memory deallocations in {__,}ntfs_clear_{,big_}inode(). + - Make all operations on ntfs inode state bits use the NIno* functions. + - Set up the new ntfs inode fields and state bits in + fs/ntfs/inode.c::ntfs_read_inode() and add appropriate cleanup of + allocated memory to __ntfs_clear_inode(). + - Cleanup ntfs_inode structure a bit for better ordering of elements + w.r.t. their size to allow better packing of the structure in memory. + +2.0.10 - There can only be 2^32 - 1 inodes on an NTFS volume. + + - Add check at mount time to verify that the number of inodes on the + volume does not exceed 2^32 - 1, which is the maximum allowed for + NTFS according to Microsoft. + - Change mft_no member of ntfs_inode structure to be unsigned long. + Update all users. This makes ntfs_inode->mft_no just a copy of struct + inode->i_ino. But we can't just always use struct inode->i_ino and + remove mft_no because extent inodes do not have an attached struct + inode. + +2.0.9 - Decompression engine now uses a single buffer and other cleanups. + + - Change decompression engine to use a single buffer protected by a + spin lock instead of per-CPU buffers. (Rusty Russell) + - Do not update cb_pos when handling a partial final page during + decompression of a sparse compression block, as the value is later + reset without being read/used. (Rusty Russell) + - Switch to using the new KM_BIO_SRC_IRQ for atomic kmap()s. (Andrew + Morton) + - Change buffer size in ntfs_readdir()/ntfs_filldir() to use + NLS_MAX_CHARSET_SIZE which makes the buffers almost 1kiB each but + it also makes everything safer so it is a good thing. + - Miscellaneous minor cleanups to comments. + +2.0.8 - Major updates for handling of case sensitivity and dcache aliasing. + + Big thanks go to Al Viro and other inhabitants of #kernel for investing + their time to discuss the case sensitivity and dcache aliasing issues. + + - Remove unused source file fs/ntfs/attraops.c. + - Remove show_inodes mount option(s), thus dropping support for + displaying of short file names. + - Remove deprecated mount option posix. + - Restore show_sys_files mount option. + - Add new mount option case_sensitive, to determine if the driver + treats file names as case sensitive or not. If case sensitive, create + file names in the POSIX namespace. Otherwise create file names in the + LONG/WIN32 namespace. Note, files remain accessible via their short + file name, if it exists. + - Remove really dumb logic bug in boot sector recovery code. + - Fix dcache aliasing issues wrt short/long file names via changes + to fs/ntfs/dir.c::ntfs_lookup_inode_by_name() and + fs/ntfs/namei.c::ntfs_lookup(): + - Add additional argument to ntfs_lookup_inode_by_name() in which we + return information about the matching file name if the case is not + matching or the match is a short file name. See comments above the + function definition for details. + - Change ntfs_lookup() to only create dcache entries for the correctly + cased file name and only for the WIN32 namespace counterpart of DOS + namespace file names. This ensures we have only one dentry per + directory and also removes all dcache aliasing issues between short + and long file names once we add write support. See comments above + function for details. + - Fix potential 1 byte overflow in fs/ntfs/unistr.c::ntfs_ucstonls(). + +2.0.7d - Minor bugfixes including PPC compilation fixes and some cleanups. + + - The previous version didn't handle the case when the NTFS was + compiled as the module and CONFIG_HIGHMEM was on. Missing exports + for kmap_pte and kmap_prot were added. + - Fix the compile problem for module compilation for PPC arch + - Remove not anymore used file (attraops.c) + +2.0.7c - Minor bugfixes. + + - Define KM_BIO_IRQ in km_type to let NTFS-TNG compile with + CONFIG_HIGHMEM enabled (reported by Arek Miskiewicz) + +2.0.7b - Minor updates from the NTFS-TNG main line. + + - Use preempt_enable() and preempt_disable() to let NTFS work with + the preemptible kernel patch by Robert Love. + +2.0.7a - Sync with 2.0.7. + +2.0.7 - Minor cleanups and updates for changes in core kernel code. + + - Remove much of the NULL struct element initializers. + - Various updates to make compatible with recent kernels. + - Remove defines of MAX_BUF_PER_PAGE and include linux/buffer_head.h + in fs/ntfs/ntfs.h instead. + - Remove no longer needed KERNEL_VERSION checks. We are now in the + kernel proper so they are no longer needed. + +2.0.6b - Remove relevant parts from the patch. + +2.0.6a - Initial backport of the driver to the 2.4.18 kernel. + + Backported all main features of the driver. It runs stable at my + box - it survived the crash test :-)). All 2.5.x speficic changes + were left over (eg. dropping BKL, changes in mtfbmp initialization, + dropping setting old blocksize in the error path). + +2.0.6 - Major bugfix to make compatible with other kernel changes. + + - Initialize the mftbmp address space properly now that there are more + fields in the struct address_space. This was leading to hangs and + oopses on umount since 2.5.12 because of changes to other parts of + the kernel. We probably want a kernel generic init_address_space() + function... + - Drop BKL from ntfs_readdir() after consultation with Al Viro. The + only caller of ->readdir() is vfs_readdir() which holds i_sem during + the call, and i_sem is sufficient protection against changes in the + directory inode (including ->i_size). + - Use generic_file_llseek() for directories (as opposed to + default_llseek()) as this downs i_sem instead of the BKL which is + what we now need for exclusion against ->f_pos changes considering we + no longer take the BKL in ntfs_readdir(). + +2.0.5 - Major bugfix. Buffer overflow in extent inode handling. + + - No need to set old blocksize in super.c::ntfs_fill_super() as the + VFS does so via invocation of deactivate_super() calling + fs->fill_super() calling block_kill_super() which does it. + - BKL moved from VFS into dir.c::ntfs_readdir(). (Linus Torvalds) + -> Do we really need it? I don't think so as we have exclusion on + the directory ntfs_inode rw_semaphore mrec_lock. We mmight have to + move the ->f_pos accesses under the mrec_lock though. Check this... + - Fix really, really, really stupid buffer overflow in extent inode + handling in mft.c::map_extent_mft_record(). + +2.0.4 - Cleanups and updates for kernel 2.5.11. + + - Add documentation on how to use the MD driver to be able to use NTFS + stripe and volume sets in Linux and generally cleanup documentation + a bit. + Remove all uses of kdev_t in favour of struct block_device *: + - Change compress.c::ntfs_file_read_compressed_block() to use + sb_getblk() instead of getblk(). + - Change super.c::ntfs_fill_super() to use bdev_hardsect_size() instead + of get_hardsect_size(). + - No need to get old blocksize in super.c::ntfs_fill_super() as + fs/super.c::get_sb_bdev() already does this. + - Set bh->b_bdev instead of bh->b_dev throughout aops.c. + +2.0.3 - Small bug fixes, cleanups, and performance improvements. + + - Remove some dead code from mft.c. + - Optimize readpage and read_block functions throughout aops.c so that + only initialized blocks are read. Non-initialized ones have their + buffer head mapped, zeroed, and set up to date, without scheduling + any i/o. Thanks to Al Viro for advice on how to avoid the device i/o. + Thanks go to Andrew Morton for spotting the below: + - Fix buglet in allocate_compression_buffers() error code path. + - Call flush_dcache_page() after modifying page cache page contents in + ntfs_file_readpage(). + - Check for existence of page buffers throughout aops.c before calling + create_empty_buffers(). This happens when an I/O error occurs and the + read is retried. (It also happens once writing is implemented so that + needed doing anyway but I had left it for later...) + - Don't BUG_ON() uptodate and/or mapped buffers throughout aops.c in + readpage and read_block functions. Reasoning same as above (i.e. I/O + error retries and future write code paths.) + +2.0.2 - Minor updates and cleanups. + + - Cleanup: rename mst.c::__post_read_mst_fixup to post_write_mst_fixup + and cleanup the code a bit, removing the unused size parameter. + - Change default fmask to 0177 and update documentation. + - Change attrib.c::get_attr_search_ctx() to return the search context + directly instead of taking the address of a pointer. A return value + of NULL means the allocation failed. Updated all callers + appropriately. + - Update to 2.5.9 kernel (preserving backwards compatibility) by + replacing all occurences of page->buffers with page_buffers(page). + - Fix minor bugs in run list merging, also minor cleanup. + - Updates to bootsector layout and mft mirror contents descriptions. + - Small bug fix in error detection in unistr.c and some cleanups. + - Grow name buffer allocations in unistr.c in aligned mutlipled of 64 + bytes. + +2.0.1 - Minor updates. + + - Make default umask correspond to documentation. + - Improve documentation. + - Set default mode to include execute bit. The {u,f,d}mask can be used + to take it away if desired. This allows binaries to be executed from + a mounted ntfs partition. + +2.0.0 - New version number. Remove TNG from the name. Now in the kernel. + + - Add kill_super, just keeping up with the vfs changes in the kernel. + - Repeat some changes from tng-0.0.8 that somehow got lost on the way + from the CVS import into BitKeeper. + - Begin to implement proper handling of allocated_size vs + initialized_size vs data_size (i.e. i_size). Done are + mft.c::ntfs_mft_readpage(), aops.c::end_buffer_read_index_async(), + and attrib.c::load_attribute_list(). + - Lock the run list in attrib.c::load_attribute_list() while using it. + - Fix memory leak in ntfs_file_read_compressed_block() and generally + clean up compress.c a little, removing some uncommented/unused debug + code. + - Tidy up dir.c a little bit. + - Don't bother getting the run list in inode.c::ntfs_read_inode(). + - Merge mft.c::ntfs_mft_readpage() and aops.c::ntfs_index_readpage() + creating aops.c::ntfs_mst_readpage(), improving the handling of + holes and overflow in the process and implementing the correct + equivalent of ntfs_file_get_block() in ntfs_mst_readpage() itself. + I am aiming for correctness at the moment. Modularisation can come + later. + - Rename aops.c::end_buffer_read_index_async() to + end_buffer_read_mst_async() and optimize the overflow checking and + handling. + - Use the host of the mftbmp address space mapping to hold the ntfs + volume. This is needed so the async i/o completion handler can + retrieve a pointer to the volume. Hopefully this will not cause + problems elsewhere in the kernel... Otherwise will need to use a + fake inode. + - Complete implementation of proper handling of allocated_size vs + initialized_size vs data_size (i.e. i_size) in whole driver. + Basically aops.c is now completely rewritten. + - Change NTFS driver name to just NTFS and set version number to 2.0.0 + to make a clear distinction from the old driver which is still on + version 1.1.22. + +tng-0.0.8 - 08/03/2002 - Now using BitKeeper, http://linux-ntfs.bkbits.net/ + + - Replace bdevname(sb->s_dev) with sb->s_id. + - Remove now superfluous new-line characters in all callers of + ntfs_debug(). + - Apply kludge in ntfs_read_inode(), setting i_nlink to 1 for + directories. Without this the "find" utility gets very upset which is + fair enough as Linux/Unix do not support directory hard links. + - Further run list merging work. (Richard Russon) + - Backwards compatibility for gcc-2.95. (Richard Russon) + - Update to kernel 2.5.5-pre1 and rediff the now tiny patch. + - Convert to new file system declaration using ->ntfs_get_sb() and + replacing ntfs_read_super() with ntfs_fill_super(). + - Set s_maxbytes to MAX_LFS_FILESIZE to avoid page cache page index + overflow on 32-bit architectures. + - Cleanup upcase loading code to use ntfs_(un)map_page(). + - Disable/reenable preemtion in critical sections of compession engine. + - Replace device size determination in ntfs_fill_super() with + sb->s_bdev->bd_inode->i_size (in bytes) and remove now superfluous + function super.c::get_nr_blocks(). + - Implement a mount time option (show_inodes) allowing choice of which + types of inode names readdir() returns and modify ntfs_filldir() + accordingly. There are several parameters to show_inodes: + system: system files + win32: long file names (including POSIX file names) [DEFAULT] + long: same as win32 + dos: short file names only (excluding POSIX file names) + short: same as dos + posix: same as both win32 and dos + all: all file names + Note that the options are additive, i.e. specifying: + -o show_inodes=system,show_inodes=win32,show_inodes=dos + is the same as specifying: + -o show_inodes=all + Note that the "posix" and "all" options will show all directory + names, BUT the link count on each directory inode entry is set to 1, + due to Linux not supporting directory hard links. This may well + confuse some userspace applications, since the directory names will + have the same inode numbers. Thus it is NOT advisable to use the + "posix" or "all" options. We provide them only for completeness sake. + - Add copies of allocated_size, initialized_size, and compressed_size to + the ntfs inode structure and set them up in + inode.c::ntfs_read_inode(). These reflect the unnamed data attribute + for files and the index allocation attribute for directories. + - Add copies of allocated_size and initialized_size to ntfs inode for + $BITMAP attribute of large directories and set them up in + inode.c::ntfs_read_inode(). + - Add copies of allocated_size and initialized_size to ntfs volume for + $BITMAP attribute of $MFT and set them up in + super.c::load_system_files(). + - Parse deprecated ntfs driver options (iocharset, show_sys_files, + posix, and utf8) and tell user what the new options to use are. Note + we still do support them but they will be removed with kernel 2.7.x. + - Change all occurences of integer long long printf formatting to hex + as printk() will not support long long integer format if/when the + div64 patch goes into the kernel. + - Make slab caches have stable names and change the names to what they + were intended to be. These changes are required/made possible by the + new slab cache name handling which removes the length limitation by + requiring the caller of kmem_cache_create() to supply a stable name + which is then referenced but not copied. + - Rename run_list structure to run_list_element and create a new + run_list structure containing a pointer to a run_list_element + structure and a read/write semaphore. Adapt all users of run lists + to new scheme and take and release the lock as needed. This fixes a + nasty race as the run_list changes even when inodes are locked for + reading and even when the inode isn't locked at all, so we really + needed the serialization. We use a semaphore rather than a spinlock + as memory allocations can sleep and doing everything GFP_ATOMIC + would be silly. + - Cleanup read_inode() removing all code checking for lowest_vcn != 0. + This can never happen due to the nature of lookup_attr() and how we + support attribute lists. If it did happen it would imply the inode + being corrupt. + - Check for lowest_vcn != 0 in ntfs_read_inode() and mark the inode as + bad if found. + - Update to 2.5.6-pre2 changes in struct address_space. + - Use parent_ino() when accessing d_parent inode number in dir.c. + - Import Sourceforge CVS repository into BitKeeper repository: + http://linux-ntfs.bkbits.net/ntfs-tng-2.5 + - Update fs/Makefile, fs/Config.help, fs/Config.in, and + Documentation/filesystems/ntfs.txt for NTFS TNG. + - Create kernel configuration option controlling whether debugging + is enabled or not. + - Add the required export of end_buffer_io_sync() from the patches + directory to the kernel code. + - Update inode.c::ntfs_show_options() with show_inodes mount option. + - Update errors mount option. + +tng-0.0.7 - 13/02/2002 - The driver is now feature complete for read-only! + + - Cleanup mft.c and it's debug/error output in particular. Fix a minor + bug in mapping of extent inodes. Update all the comments to fit all + the recent code changes. + - Modify vcn_to_lcn() to cope with entirely unmapped run lists. + - Cleanups in compress.c, mostly comments and folding help. + - Implement attrib.c::map_run_list() as a generic helper. + - Make compress.c::ntfs_file_read_compressed_block() use map_run_list() + thus making code shorter and enabling attribute list support. + - Cleanup incorrect use of [su]64 with %L printf format specifier in + all source files. Type casts to [unsigned] long long added to correct + the mismatches (important for architectures which have long long not + being 64 bits). + - Merge async io completion handlers for directory indexes and $MFT + data into one by setting the index_block_size{_bits} of the ntfs + inode for $MFT to the mft_record_size{_bits} of the ntfs_volume. + - Cleanup aops.c, update comments. + - Make ntfs_file_get_block() use map_run_list() so all files now + support attribute lists. + - Make ntfs_dir_readpage() almost verbatim copy of + block_read_full_page() by using ntfs_file_get_block() with only real + difference being the use of our own async io completion handler + rather than the default one, thus reducing the amount of code and + automatically enabling attribute list support for directory indices. + - Fix bug in load_attribute_list() - forgot to call brelse in error + code path. + - Change parameters to find_attr() and lookup_attr(). We no longer + pass in the upcase table and its length. These can be gotten from + ctx->ntfs_ino->vol->upcase{_len}. Update all callers. + - Cleanups in attrib.c. + - Implement merging of run lists, attrib.c::merge_run_lists() and its + helpers. (Richard Russon) + - Attribute lists part 2, attribute extents and multi part run lists: + enable proper support for LCN_RL_NOT_MAPPED and automatic mapping of + further run list parts via attrib.c::map_run_list(). + - Tiny endianness bug fix in decompress_mapping_pairs(). + +tng-0.0.6 - Encrypted directories, bug fixes, cleanups, debugging enhancements. + + - Enable encrypted directories. (Their index root is marked encrypted + to indicate that new files in that directory should be created + encrypted.) + - Fix bug in NInoBmpNonResident() macro. (Cut and paste error.) + - Enable $Extend system directory. Most (if not all) extended system + files do not have unnamed data attributes so ntfs_read_inode() had to + special case them but that is ok, as the special casing recovery + happens inside an error code path so there is zero slow down in the + normal fast path. The special casing is done by introducing a new + function inode.c::ntfs_is_extended_system_file() which checks if any + of the hard links in the inode point to $Extend as being their parent + directory and if they do we assume this is an extended system file. + - Create a sysctl/proc interface to allow {dis,en}abling of debug output + when compiled with -DDEBUG. Default is debug messages to be disabled. + To enable them, one writes a non-zero value to /proc/sys/fs/ntfs-debug + (if /proc is enabled) or uses sysctl(2) to effect the same (if sysctl + interface is enabled). Inspired by old ntfs driver. + - Add debug_msgs insmod/kernel boot parameter to set whether debug + messages are {dis,en}abled. This is useful to enable debug messages + during ntfs initialization and is the only way to activate debugging + when the sysctl interface is not enabled. + - Cleanup debug output in various places. + - Remove all dollar signs ($) from the source (except comments) to + enable compilation on architectures whose gcc compiler does not + support dollar signs in the names of variables/constants. Attribute + types now start with AT_ instead of $ and $I30 is now just I30. + - Cleanup ntfs_lookup() and add consistency check of sequence numbers. + - Load complete run list for $MFT/$BITMAP during mount and cleanup + access functions. This means we now cope with $MFT/$BITMAP being + spread accross several mft records. + - Disable modification of mft_zone_multiplier on remount. We can always + reenable this later on if we really want to, but we will need to make + sure we readjust the mft_zone size / layout accordingly. + +tng-0.0.5 - Modernize for 2.5.x and further in line-ing with Al Viro's comments. + + - Use sb_set_blocksize() instead of set_blocksize() and verify the + return value. + - Use sb_bread() instead of bread() throughout. + - Add index_vcn_size{_bits} to ntfs_inode structure to store the size + of a directory index block vcn. Apply resulting simplifications in + dir.c everywhere. + - Fix a small bug somewhere (but forgot what it was). + - Change ntfs_{debug,error,warning} to enable gcc to do type checking + on the printf-format parameter list and fix bugs reported by gcc + as a result. (Richard Russon) + - Move inode allocation strategy to Al's new stuff but maintain the + divorce of ntfs_inode from struct inode. To achieve this we have two + separate slab caches, one for big ntfs inodes containing a struct + inode and pure ntfs inodes and at the same time fix some faulty + error code paths in ntfs_read_inode(). + - Show mount options in proc (inode.c::ntfs_show_options()). + +tng-0.0.4 - Big changes, getting in line with Al Viro's comments. + + - Modified (un)map_mft_record functions to be common for read and write + case. To specify which is which, added extra parameter at front of + parameter list. Pass either READ or WRITE to this, each has the + obvious meaning. + - General cleanups to allow for easier folding in vi. + - attrib.c::decompress_mapping_pairs() now accepts the old run list + argument, and invokes attrib.c::merge_run_lists() to merge the old + and the new run lists. + - Removed attrib.c::find_first_attr(). + - Implemented loading of attribute list and complete run list for $MFT. + This means we now cope with $MFT being spread across several mft + records. + - Adapt to 2.5.2-pre9 and the changed create_empty_buffers() syntax. + - Adapt major/minor/kdev_t/[bk]devname stuff to new 2.5.x kernels. + - Make ntfs_volume be allocated via kmalloc() instead of using a slab + cache. There are too little ntfs_volume structures at any one time + to justify a private slab cache. + - Fix bogus kmap() use in async io completion. Now use kmap_atomic(). + Use KM_BIO_IRQ on advice from IRC/kernel... + - Use ntfs_map_page() in map_mft_record() and create ->readpage method + for reading $MFT (ntfs_mft_readpage). In the process create dedicated + address space operations (ntfs_mft_aops) for $MFT inode mapping. Also + removed the now superfluous exports from the kernel core patch. + - Fix a bug where kfree() was used insted of ntfs_free(). + - Change map_mft_record() to take ntfs_inode as argument instead of + vfs inode. Dito for unmap_mft_record(). Adapt all callers. + - Add pointer to ntfs_volume to ntfs_inode. + - Add mft record number and sequence number to ntfs_inode. Stop using + i_ino and i_generation for in-driver purposes. + - Implement attrib.c::merge_run_lists(). (Richard Russon) + - Remove use of proper inodes by extent inodes. Move i_ino and + i_generation to ntfs_inode to do this. Apply simplifications that + result and remove iget_no_wait(), etc. + - Pass ntfs_inode everywhere in the driver (used to be struct inode). + - Add reference counting in ntfs_inode for the ntfs inode itself and + for the mapped mft record. + - Extend mft record mapping so we can (un)map extent mft records (new + functions (un)map_extent_mft_record), and so mappings are reference + counted and don't have to happen twice if already mapped - just ref + count increases. + - Add -o iocharset as alias to -o nls for backwards compatibility. + - The latest core patch is now tiny. In fact just a single additional + export is necessary over the base kernel. + +tng-0.0.3 - Cleanups, enhancements, bug fixes. + + - Work on attrib.c::decompress_mapping_pairs() to detect base extents + and setup the run list appropriately using knowledge provided by the + sizes in the base attribute record. + - Balance the get_/put_attr_search_ctx() calls so we don't leak memory + any more. + - Introduce ntfs_malloc_nofs() and ntfs_free() to allocate/free a single + page or use vmalloc depending on the amount of memory requested. + - Cleanup error output. The __FUNCTION__ "(): " is now added + automatically. Introduced a new header file debug.h to support this + and also moved ntfs_debug() function into it. + - Make reading of compressed files more intelligent and especially get + rid of the vmalloc_nofs() from readpage(). This now uses per CPU + buffers (allocated at first mount with cluster size <= 4kiB and + deallocated on last umount with cluster size <= 4kiB), and + asynchronous io for the compressed data using a list of buffer heads. + Er, we use synchronous io as async io only works on whole pages + covered by buffers and not on individual buffer heads... + - Bug fix for reading compressed files with sparse compression blocks. + +tng-0.0.2 - Now handles larger/fragmented/compressed volumes/files/dirs. + + - Fixed handling of directories when cluster size exceeds index block + size. + - Hide DOS only name space directory entries from readdir() but allow + them in lookup(). This should fix the problem that Linux doesn't + support directory hard links, while still allowing access to entries + via their short file name. This also has the benefit of mimicking + what Windows users are used to, so it is the ideal solution. + - Implemented sync_page everywhere so no more hangs in D state when + waiting for a page. + - Stop using bforget() in favour of brelse(). + - Stop locking buffers unnecessarily. + - Implemented compressed files (inode->mapping contains uncompressed + data, raw compressed data is currently bread() into a vmalloc()ed + memory buffer). + - Enable compressed directories. (Their index root is marked compressed + to indicate that new files in that directory should be created + compressed.) + - Use vsnprintf rather than vsprintf in the ntfs_error and ntfs_warning + functions. (Thanks to Will Dyson for pointing this out.) + - Moved the ntfs_inode and ntfs_volume (the former ntfs_inode_info and + ntfs_sb_info) out of the common inode and super_block structures and + started using the generic_ip and generic_sbp pointers instead. This + makes ntfs entirely private with respect to the kernel tree. + - Detect compiler version and abort with error message if gcc less than + 2.96 is used. + - Fix bug in name comparison function in unistr.c. + - Implement attribute lists part 1, the infrastructure: search contexts + and operations, find_external_attr(), lookup_attr()) and make the + code use the infrastructure. + - Fix stupid buffer overflow bug that became apparent on larger run + list containing attributes. + - Fix bugs in readdir() that became apparent on larger directories. + + The driver is now really useful and survives the test + find . -type f -exec md5sum "{}" \; + without any error messages on a over 1GiB sized partition with >16k + files on it, including compressed files and directories and many files + and directories with attribute lists. + +tng-0.0.1 - The first useful version. + + - Added ntfs_lookup(). + - Added default upcase generation and handling. + - Added compile options to be shown on module init. + - Many bug fixes that were "hidden" before. + - Update to latest kernel. + - Added ntfs_readdir(). + - Added file operations for mmap(), read(), open() and llseek(). We just + use the generic ones. The whole point of going through implementing + readpage() methods and where possible get_block() call backs is that + this allows us to make use of the generic high level methods provided + by the kernel. + + The driver is now actually useful! Yey. (-: It undoubtedly has got bugs + though and it doesn't implement accesssing compressed files yet. Also, + accessing files with attribute list attributes is not implemented yet + either. But for small or simple file systems it should work and allow + you to list directories, use stat on directory entries and the file + system, open, read, mmap and llseek around in files. A big mile stone + has been reached! + +tng-0.0.0 - Initial version tag. + + Initial driver implementation. The driver can mount and umount simple + NTFS file systems (i.e. ones without attribute lists in the system + files). If the mount fails there might be problems in the error handling + code paths, so be warned. Otherwise it seems to be loading the system + files nicely and the mft record read mapping/unmapping seems to be + working nicely, too. Proof of inode metadata in the page cache and non- + resident file unnamed stream data in the page cache concepts is thus + complete. + diff -urN linux-2.4.24-vanilla/fs/ntfs/Makefile linux-2.4.24-ntfs-2.1.6a/fs/ntfs/Makefile --- linux-2.4.24-vanilla/fs/ntfs/Makefile 2002-02-25 19:38:09.000000000 +0000 +++ linux-2.4.24-ntfs-2.1.6a/fs/ntfs/Makefile 2004-01-21 14:28:25.000000000 +0000 @@ -1,11 +1,21 @@ -# Rules for making the NTFS driver +# Rules for making the NTFS driver. O_TARGET := ntfs.o -obj-y := fs.o sysctl.o support.o util.o inode.o dir.o super.o attr.o unistr.o +obj-y := aops.o attrib.o compress.o debug.o dir.o file.o inode.o mft.o \ + mst.o namei.o super.o sysctl.o time.o unistr.o upcase.o + obj-m := $(O_TARGET) -# New version format started 3 February 2001. -EXTRA_CFLAGS = -DNTFS_VERSION=\"1.1.22\" #-DDEBUG + +EXTRA_CFLAGS = -DNTFS_VERSION=\"2.1.6a\" + +ifeq ($(CONFIG_NTFS_DEBUG),y) +EXTRA_CFLAGS += -DDEBUG +endif + +ifeq ($(CONFIG_NTFS_RW),y) +EXTRA_CFLAGS += -DNTFS_RW +endif include $(TOPDIR)/Rules.make diff -urN linux-2.4.24-vanilla/fs/ntfs/aops.c linux-2.4.24-ntfs-2.1.6a/fs/ntfs/aops.c --- linux-2.4.24-vanilla/fs/ntfs/aops.c 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.4.24-ntfs-2.1.6a/fs/ntfs/aops.c 2004-01-21 14:28:25.000000000 +0000 @@ -0,0 +1,1810 @@ +/** + * aops.c - NTFS kernel address space operations and page cache handling. + * Part of the Linux-NTFS project. + * + * Copyright (c) 2001-2003 Anton Altaparmakov + * Copyright (c) 2002 Richard Russon + * + * This program/include file is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as published + * by the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program/include file is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program (in the main directory of the Linux-NTFS + * distribution in the file COPYING); if not, write to the Free Software + * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include +#include +#include + +#include "ntfs.h" + +/** + * ntfs_end_buffer_async_read - async io completion for reading attributes + * @bh: buffer head on which io is completed + * @uptodate: whether @bh is now uptodate or not + * + * Asynchronous I/O completion handler for reading pages belonging to the + * attribute address space of an inode. The inodes can either be files or + * directories or they can be fake inodes describing some attribute. + * + * If NInoMstProtected(), perform the post read mst fixups when all IO on the + * page has been completed and mark the page uptodate or set the error bit on + * the page. To determine the size of the records that need fixing up, we cheat + * a little bit by setting the index_block_size in ntfs_inode to the ntfs + * record size, and index_block_size_bits, to the log(base 2) of the ntfs + * record size. + */ +static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate) +{ + static spinlock_t page_uptodate_lock = SPIN_LOCK_UNLOCKED; + unsigned long flags; + struct buffer_head *tmp; + struct page *page; + ntfs_inode *ni; + int page_uptodate = 1; + + page = bh->b_page; + ni = NTFS_I(page->mapping->host); + + if (likely(uptodate)) { + s64 file_ofs; + + set_buffer_uptodate(bh); + + file_ofs = (page->index << PAGE_CACHE_SHIFT) + bh_offset(bh); + /* Check for the current buffer head overflowing. */ + if (file_ofs + bh->b_size > ni->initialized_size) { + char *addr; + int ofs = 0; + + if (file_ofs < ni->initialized_size) + ofs = ni->initialized_size - file_ofs; + addr = kmap_atomic(page, KM_BIO_IRQ); + memset(addr + bh_offset(bh) + ofs, 0, bh->b_size - ofs); + flush_dcache_page(page); + kunmap_atomic(addr, KM_BIO_IRQ); + } + } else { + clear_buffer_uptodate(bh); + ntfs_error(ni->vol->sb, "Buffer I/O error, logical block %Lu.", + (unsigned long long)bh->b_blocknr); + SetPageError(page); + } + + spin_lock_irqsave(&page_uptodate_lock, flags); + mark_buffer_async(bh, 0); + unlock_buffer(bh); + tmp = bh; + do { + if (!buffer_uptodate(tmp)) + page_uptodate = 0; + if (buffer_async(tmp)) { + if (likely(buffer_locked(tmp))) + goto still_busy; + /* Async buffers must be locked. */ + BUG(); + } + tmp = tmp->b_this_page; + } while (tmp != bh); + spin_unlock_irqrestore(&page_uptodate_lock, flags); + /* + * If none of the buffers had errors then we can set the page uptodate, + * but we first have to perform the post read mst fixups, if the + * attribute is mst protected, i.e. if NInoMstProteced(ni) is true. + */ + if (!NInoMstProtected(ni)) { + if (likely(page_uptodate && !PageError(page))) + SetPageUptodate(page); + } else { + char *addr; + unsigned int i, recs, nr_err; + u32 rec_size; + + rec_size = ni->itype.index.block_size; + recs = PAGE_CACHE_SIZE / rec_size; + addr = kmap_atomic(page, KM_BIO_IRQ); + for (i = nr_err = 0; i < recs; i++) { + if (likely(!post_read_mst_fixup((NTFS_RECORD*)(addr + + i * rec_size), rec_size))) + continue; + nr_err++; + ntfs_error(ni->vol->sb, "post_read_mst_fixup() failed, " + "corrupt %s record 0x%Lx. Run chkdsk.", + ni->mft_no ? "index" : "mft", + (long long)(((s64)page->index << + PAGE_CACHE_SHIFT >> + ni->itype.index.block_size_bits) + i)); + } + flush_dcache_page(page); + kunmap_atomic(addr, KM_BIO_IRQ); + if (likely(!PageError(page))) { + if (likely(!nr_err && recs)) { + if (likely(page_uptodate)) + SetPageUptodate(page); + } else { + ntfs_error(ni->vol->sb, "Setting page error, " + "index 0x%lx.", page->index); + SetPageError(page); + } + } + } + unlock_page(page); + return; +still_busy: + spin_unlock_irqrestore(&page_uptodate_lock, flags); + return; +} + +/** + * ntfs_read_block - fill a @page of an address space with data + * @page: page cache page to fill with data + * + * Fill the page @page of the address space belonging to the @page->host inode. + * We read each buffer asynchronously and when all buffers are read in, our io + * completion handler ntfs_end_buffer_read_async(), if required, automatically + * applies the mst fixups to the page before finally marking it uptodate and + * unlocking it. + * + * We only enforce allocated_size limit because i_size is checked for in + * generic_file_read(). + * + * Return 0 on success and -errno on error. + * + * Contains an adapted version of fs/buffer.c::block_read_full_page(). + */ +static int ntfs_read_block(struct page *page) +{ + VCN vcn; + LCN lcn; + ntfs_inode *ni; + ntfs_volume *vol; + run_list_element *rl; + struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE]; + sector_t iblock, lblock, zblock; + unsigned int blocksize, vcn_ofs; + int i, nr; + unsigned char blocksize_bits; + + ni = NTFS_I(page->mapping->host); + vol = ni->vol; + + blocksize_bits = VFS_I(ni)->i_blkbits; + blocksize = 1 << blocksize_bits; + + if (!page_has_buffers(page)) + create_empty_buffers(page, VFS_I(ni)->i_dev, blocksize); + bh = head = page_buffers(page); + if (unlikely(!bh)) { + unlock_page(page); + return -ENOMEM; + } + + iblock = page->index << (PAGE_CACHE_SHIFT - blocksize_bits); + lblock = (ni->allocated_size + blocksize - 1) >> blocksize_bits; + zblock = (ni->initialized_size + blocksize - 1) >> blocksize_bits; + +#ifdef DEBUG + if (unlikely(!ni->run_list.rl && !ni->mft_no && !NInoAttr(ni))) + panic("NTFS: $MFT/$DATA run list has been unmapped! This is a " + "very serious bug! Cannot continue..."); +#endif + + /* Loop through all the buffers in the page. */ + rl = NULL; + nr = i = 0; + do { + if (unlikely(buffer_uptodate(bh))) + continue; + if (unlikely(buffer_mapped(bh))) { + arr[nr++] = bh; + continue; + } + bh->b_dev = VFS_I(ni)->i_dev; + /* Is the block within the allowed limits? */ + if (iblock < lblock) { + BOOL is_retry = FALSE; + + /* Convert iblock into corresponding vcn and offset. */ + vcn = (VCN)iblock << blocksize_bits >> + vol->cluster_size_bits; + vcn_ofs = ((VCN)iblock << blocksize_bits) & + vol->cluster_size_mask; + if (!rl) { +lock_retry_remap: + down_read(&ni->run_list.lock); + rl = ni->run_list.rl; + } + if (likely(rl != NULL)) { + /* Seek to element containing target vcn. */ + while (rl->length && rl[1].vcn <= vcn) + rl++; + lcn = vcn_to_lcn(rl, vcn); + } else + lcn = (LCN)LCN_RL_NOT_MAPPED; + /* Successful remap. */ + if (lcn >= 0) { + /* Setup buffer head to correct block. */ + bh->b_blocknr = ((lcn << vol->cluster_size_bits) + + vcn_ofs) >> blocksize_bits; + set_buffer_mapped(bh); + /* Only read initialized data blocks. */ + if (iblock < zblock) { + arr[nr++] = bh; + continue; + } + /* Fully non-initialized data block, zero it. */ + goto handle_zblock; + } + /* It is a hole, need to zero it. */ + if (lcn == LCN_HOLE) + goto handle_hole; + /* If first try and run list unmapped, map and retry. */ + if (!is_retry && lcn == LCN_RL_NOT_MAPPED) { + is_retry = TRUE; + /* + * Attempt to map run list, dropping lock for + * the duration. + */ + up_read(&ni->run_list.lock); + if (!map_run_list(ni, vcn)) + goto lock_retry_remap; + rl = NULL; + } + /* Hard error, zero out region. */ + SetPageError(page); + ntfs_error(vol->sb, "vcn_to_lcn(vcn = 0x%Lx) failed " + "with error code 0x%Lx%s.", + (long long)vcn, (long long)-lcn, + is_retry ? " even after retrying" : ""); + // FIXME: Depending on vol->on_errors, do something. + } + /* + * Either iblock was outside lblock limits or vcn_to_lcn() + * returned error. Just zero that portion of the page and set + * the buffer uptodate. + */ +handle_hole: + bh->b_blocknr = -1UL; + clear_buffer_mapped(bh); +handle_zblock: + memset(kmap(page) + i * blocksize, 0, blocksize); + flush_dcache_page(page); + kunmap(page); + set_buffer_uptodate(bh); + } while (i++, iblock++, (bh = bh->b_this_page) != head); + + /* Release the lock if we took it. */ + if (rl) + up_read(&ni->run_list.lock); + + /* Check we have at least one buffer ready for i/o. */ + if (nr) { + struct buffer_head *tbh; + + /* Lock the buffers. */ + for (i = 0; i < nr; i++) { + tbh = arr[i]; + lock_buffer(tbh); + tbh->b_end_io = ntfs_end_buffer_async_read; + set_buffer_async(tbh); + } + /* Finally, start i/o on the buffers. */ + for (i = 0; i < nr; i++) { + tbh = arr[i]; + if (likely(!buffer_uptodate(tbh))) + submit_bh(READ, tbh); + else + ntfs_end_buffer_async_read(tbh, 1); + } + return 0; + } + /* No i/o was scheduled on any of the buffers. */ + if (likely(!PageError(page))) + SetPageUptodate(page); + else /* Signal synchronous i/o error. */ + nr = -EIO; + unlock_page(page); + return nr; +} + +/** + * ntfs_readpage - fill a @page of a @file with data from the device + * @file: open file to which the page @page belongs or NULL + * @page: page cache page to fill with data + * + * For non-resident attributes, ntfs_readpage() fills the @page of the open + * file @file by calling the ntfs version of the generic block_read_full_page() + * function, ntfs_read_block(), which in turn creates and reads in the buffers + * associated with the page asynchronously. + * + * For resident attributes, OTOH, ntfs_readpage() fills @page by copying the + * data from the mft record (which at this stage is most likely in memory) and + * fills the remainder with zeroes. Thus, in this case, I/O is synchronous, as + * even if the mft record is not cached at this point in time, we need to wait + * for it to be read in before we can do the copy. + * + * Return 0 on success and -errno on error. + * + * WARNING: Do not make this function static! It is used by mft.c! + */ +int ntfs_readpage(struct file *file, struct page *page) +{ + s64 attr_pos; + ntfs_inode *ni, *base_ni; + char *addr; + attr_search_context *ctx; + MFT_RECORD *mrec; + u32 attr_len; + int err = 0; + + BUG_ON(!PageLocked(page)); + + /* + * This can potentially happen because we clear PageUptodate() during + * ntfs_writepage() of MstProtected() attributes. + */ + if (PageUptodate(page)) { + unlock_page(page); + return 0; + } + + ni = NTFS_I(page->mapping->host); + + if (NInoNonResident(ni)) { + /* + * Only unnamed $DATA attributes can be compressed or + * encrypted. + */ + if (ni->type == AT_DATA && !ni->name_len) { + /* If file is encrypted, deny access, just like NT4. */ + if (NInoEncrypted(ni)) { + err = -EACCES; + goto err_out; + } + /* Compressed data streams are handled in compress.c. */ + if (NInoCompressed(ni)) + return ntfs_read_compressed_block(page); + } + /* Normal data stream. */ + return ntfs_read_block(page); + } + /* Attribute is resident, implying it is not compressed or encrypted. */ + if (!NInoAttr(ni)) + base_ni = ni; + else + base_ni = ni->ext.base_ntfs_ino; + + /* Map, pin, and lock the mft record. */ + mrec = map_mft_record(base_ni); + if (unlikely(IS_ERR(mrec))) { + err = PTR_ERR(mrec); + goto err_out; + } + ctx = get_attr_search_ctx(base_ni, mrec); + if (unlikely(!ctx)) { + err = -ENOMEM; + goto unm_err_out; + } + if (unlikely(!lookup_attr(ni->type, ni->name, ni->name_len, + IGNORE_CASE, 0, NULL, 0, ctx))) { + err = -ENOENT; + goto put_unm_err_out; + } + + /* Starting position of the page within the attribute value. */ + attr_pos = page->index << PAGE_CACHE_SHIFT; + + /* The total length of the attribute value. */ + attr_len = le32_to_cpu(ctx->attr->data.resident.value_length); + + addr = kmap(page); + /* Copy over in bounds data, zeroing the remainder of the page. */ + if (attr_pos < attr_len) { + u32 bytes = attr_len - attr_pos; + if (bytes > PAGE_CACHE_SIZE) + bytes = PAGE_CACHE_SIZE; + else if (bytes < PAGE_CACHE_SIZE) + memset(addr + bytes, 0, PAGE_CACHE_SIZE - bytes); + /* Copy the data to the page. */ + memcpy(addr, attr_pos + (char*)ctx->attr + + le16_to_cpu( + ctx->attr->data.resident.value_offset), bytes); + } else + memset(addr, 0, PAGE_CACHE_SIZE); + flush_dcache_page(page); + kunmap(page); + + SetPageUptodate(page); +put_unm_err_out: + put_attr_search_ctx(ctx); +unm_err_out: + unmap_mft_record(base_ni); +err_out: + unlock_page(page); + return err; +} + +#ifdef NTFS_RW + +/** + * ntfs_write_block - write a @page to the backing store + * @page: page cache page to write out + * + * This function is for writing pages belonging to non-resident, non-mst + * protected attributes to their backing store. + * + * For a page with buffers, map and write the dirty buffers asynchronously + * under page writeback. For a page without buffers, create buffers for the + * page, then proceed as above. + * + * If a page doesn't have buffers the page dirty state is definitive. If a page + * does have buffers, the page dirty state is just a hint, and the buffer dirty + * state is definitive. (A hint which has rules: dirty buffers against a clean + * page is illegal. Other combinations are legal and need to be handled. In + * particular a dirty page containing clean buffers for example.) + * + * Note the above statement is true only on 2.5 kernels. In 2.4 kernels things + * are very different and the above statement becomes: + * The page state is _always_ definitive. Partially dirty pages do not + * exist. We have to mark all the buffers dirty ourselves rather than checking + * them for being dirty. + * + * Return 0 on success and -errno on error. + * + * Based on ntfs_read_block() and __block_write_full_page(). + */ +static int ntfs_write_block(struct page *page) +{ + VCN vcn; + LCN lcn; + sector_t block, dblock, iblock; + struct inode *vi; + ntfs_inode *ni; + ntfs_volume *vol; + run_list_element *rl; + struct buffer_head *bh, *head; + unsigned int blocksize, vcn_ofs; + int err; + BOOL need_end_writeback; + unsigned char blocksize_bits; + + vi = page->mapping->host; + ni = NTFS_I(vi); + vol = ni->vol; + + ntfs_debug("Entering for inode %li, attribute type 0x%x, page index " + "0x%lx.\n", vi->i_ino, ni->type, page->index); + + BUG_ON(!NInoNonResident(ni)); + BUG_ON(NInoMstProtected(ni)); + BUG_ON(!PageUptodate(page)); + + blocksize_bits = vi->i_blkbits; + blocksize = 1 << blocksize_bits; + + /* + * We create buffers if no buffers exist. In 2.4 buffers are created + * without any state. So we just mark all the buffers dirty and + * uptodate. This simulates the situation in 2.5 kernels where all + * buffers that need writing out are both dirty and uptodate and thus + * allows us to use the 2.5 function from here on. This is a bit of a + * kludge but it should work and is certainly the most minimally + * invasive solution. + */ + if (!page_has_buffers(page)) { + create_empty_buffers(page, vi->i_dev, blocksize); + if (unlikely(!page_has_buffers(page))) { + ntfs_warning(vol->sb, "Error allocating page buffers. " + "Redirtying page so we try again " + "later."); + /* + * Put the page back on mapping->dirty_pages, but leave + * its buffer's dirty state as-is. + */ + // FIXME: Once Andrew's -EAGAIN patch goes in, remove + // the __set_page_dirty_nobuffers(page) and return + // -EAGAIN instead of zero. + set_page_dirty(page); + unlock_page(page); + return 0; + } + } + bh = head = page_buffers(page); + do { + set_buffer_uptodate(bh); + if (test_set_buffer_dirty(bh)) + ntfs_warning(vol->sb, "Dirty buffer found!"); + } while ((bh = bh->b_this_page) != head); + + /* NOTE: Different naming scheme to ntfs_read_block()! */ + + /* The first block in the page. */ + block = page->index << (PAGE_CACHE_SHIFT - blocksize_bits); + + /* The first out of bounds block for the data size. */ + dblock = (vi->i_size + blocksize - 1) >> blocksize_bits; + + /* The last (fully or partially) initialized block. */ + iblock = ni->initialized_size >> blocksize_bits; + + /* + * Be very careful. We have no exclusion from __set_page_dirty_buffers + * here, and the (potentially unmapped) buffers may become dirty at + * any time. If a buffer becomes dirty here after we've inspected it + * then we just miss that fact, and the page stays dirty. + * + * Buffers outside i_size may be dirtied by __set_page_dirty_buffers; + * handle that here by just cleaning them. + */ + + /* + * Loop through all the buffers in the page, mapping all the dirty + * buffers to disk addresses and handling any aliases from the + * underlying block device's mapping. + */ + rl = NULL; + err = 0; + do { + BOOL is_retry = FALSE; + + if (unlikely(block >= dblock)) { + /* + * Mapped buffers outside i_size will occur, because + * this page can be outside i_size when there is a + * truncate in progress. The contents of such buffers + * were zeroed by ntfs_writepage(). + * + * FIXME: What about the small race window where + * ntfs_writepage() has not done any clearing because + * the page was within i_size but before we get here, + * vmtruncate() modifies i_size? + */ + clear_buffer_dirty(bh); + set_buffer_uptodate(bh); + continue; + } + + /* Clean buffers are not written out, so no need to map them. */ + if (!buffer_dirty(bh)) + continue; + + /* Make sure we have enough initialized size. */ + if (unlikely((block >= iblock) && + (ni->initialized_size < vi->i_size))) { + /* + * If this page is fully outside initialized size, zero + * out all pages between the current initialized size + * and the current page. Just use ntfs_readpage() to do + * the zeroing transparently. + */ + if (block > iblock) { + // TODO: + // For each page do: + // - read_cache_page() + // Again for each page do: + // - wait_on_page_locked() + // - Check (PageUptodate(page) && + // !PageError(page)) + // Update initialized size in the attribute and + // in the inode. + // Again, for each page do: + // __set_page_dirty_buffers(); + // page_cache_release() + // We don't need to wait on the writes. + // Update iblock. + } + /* + * The current page straddles initialized size. Zero + * all non-uptodate buffers and set them uptodate (and + * dirty?). Note, there aren't any non-uptodate buffers + * if the page is uptodate. + * FIXME: For an uptodate page, the buffers may need to + * be written out because they were not initialized on + * disk before. + */ + if (!PageUptodate(page)) { + // TODO: + // Zero any non-uptodate buffers up to i_size. + // Set them uptodate and dirty. + } + // TODO: + // Update initialized size in the attribute and in the + // inode (up to i_size). + // Update iblock. + // FIXME: This is inefficient. Try to batch the two + // size changes to happen in one go. + ntfs_error(vol->sb, "Writing beyond initialized size " + "is not supported yet. Sorry."); + err = -EOPNOTSUPP; + break; + // Do NOT set_buffer_new() BUT DO clear buffer range + // outside write request range. + // set_buffer_uptodate() on complete buffers as well as + // set_buffer_dirty(). + } + + /* No need to map buffers that are already mapped. */ + if (buffer_mapped(bh)) + continue; + + /* Unmapped, dirty buffer. Need to map it. */ + bh->b_dev = VFS_I(ni)->i_dev; + + /* Convert block into corresponding vcn and offset. */ + vcn = (VCN)block << blocksize_bits >> vol->cluster_size_bits; + vcn_ofs = ((VCN)block << blocksize_bits) & + vol->cluster_size_mask; + if (!rl) { +lock_retry_remap: + down_read(&ni->run_list.lock); + rl = ni->run_list.rl; + } + if (likely(rl != NULL)) { + /* Seek to element containing target vcn. */ + while (rl->length && rl[1].vcn <= vcn) + rl++; + lcn = vcn_to_lcn(rl, vcn); + } else + lcn = (LCN)LCN_RL_NOT_MAPPED; + /* Successful remap. */ + if (lcn >= 0) { + /* Setup buffer head to point to correct block. */ + bh->b_blocknr = ((lcn << vol->cluster_size_bits) + + vcn_ofs) >> blocksize_bits; + set_buffer_mapped(bh); + continue; + } + /* It is a hole, need to instantiate it. */ + if (lcn == LCN_HOLE) { + // TODO: Instantiate the hole. + // clear_buffer_new(bh); + // unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr); + ntfs_error(vol->sb, "Writing into sparse regions is " + "not supported yet. Sorry."); + err = -EOPNOTSUPP; + break; + } + /* If first try and run list unmapped, map and retry. */ + if (!is_retry && lcn == LCN_RL_NOT_MAPPED) { + is_retry = TRUE; + /* + * Attempt to map run list, dropping lock for + * the duration. + */ + up_read(&ni->run_list.lock); + err = map_run_list(ni, vcn); + if (likely(!err)) + goto lock_retry_remap; + rl = NULL; + } + /* Failed to map the buffer, even after retrying. */ + bh->b_blocknr = -1UL; + ntfs_error(vol->sb, "vcn_to_lcn(vcn = 0x%Lx) failed " + "with error code 0x%Lx%s.", + (long long)vcn, (long long)-lcn, + is_retry ? " even after retrying" : ""); + // FIXME: Depending on vol->on_errors, do something. + if (!err) + err = -EIO; + break; + } while (block++, (bh = bh->b_this_page) != head); + + /* Release the lock if we took it. */ + if (rl) + up_read(&ni->run_list.lock); + + /* For the error case, need to reset bh to the beginning. */ + bh = head; + + /* Just an optimization, so ->readpage() isn't called later. */ + if (unlikely(!PageUptodate(page))) { + int uptodate = 1; + do { + if (!buffer_uptodate(bh)) { + uptodate = 0; + bh = head; + break; + } + } while ((bh = bh->b_this_page) != head); + if (uptodate) + SetPageUptodate(page); + } + + /* Setup all mapped, dirty buffers for async write i/o. */ + do { + get_bh(bh); + if (buffer_mapped(bh) && buffer_dirty(bh)) { + lock_buffer(bh); + if (test_clear_buffer_dirty(bh)) { + BUG_ON(!buffer_uptodate(bh)); + set_buffer_async_io(bh); + } else + unlock_buffer(bh); + } else if (unlikely(err)) { + /* + * For the error case. The buffer may have been set + * dirty during attachment to a dirty page. + */ + if (err != -ENOMEM) + clear_buffer_dirty(bh); + } + } while ((bh = bh->b_this_page) != head); + + if (unlikely(err)) { + // TODO: Remove the -EOPNOTSUPP check later on... + if (unlikely(err == -EOPNOTSUPP)) + err = 0; + else if (err == -ENOMEM) { + ntfs_warning(vol->sb, "Error allocating memory. " + "Redirtying page so we try again " + "later."); + /* + * Put the page back on mapping->dirty_pages, but + * leave its buffer's dirty state as-is. + */ + // FIXME: Once Andrew's -EAGAIN patch goes in, remove + // the __set_page_dirty_nobuffers(page) and set err to + // -EAGAIN instead of zero. + set_page_dirty(page); + err = 0; + } else + SetPageError(page); + } + + /* + * Submit the prepared buffers for i/o. Note the page is unlocked, + * and the async write i/o completion handler can end_page_writeback() + * at any time after the *first* submit_bh(). So the buffers can then + * disappear... + */ + need_end_writeback = TRUE; + do { + struct buffer_head *next = bh->b_this_page; + if (buffer_async(bh)) { + submit_bh(WRITE, bh); + need_end_writeback = FALSE; + } + put_bh(bh); + bh = next; + } while (bh != head); + + /* If no i/o was started, need to end_page_writeback(). */ + if (unlikely(need_end_writeback)) + unlock_page(page); + + ntfs_debug("Done."); + return err; +} + +/** + * ntfs_writepage - write a @page to the backing store + * @page: page cache page to write out + * + * For non-resident attributes, ntfs_writepage() writes the @page by calling + * the ntfs version of the generic block_write_full_page() function, + * ntfs_write_block(), which in turn if necessary creates and writes the + * buffers associated with the page asynchronously. + * + * For resident attributes, OTOH, ntfs_writepage() writes the @page by copying + * the data to the mft record (which at this stage is most likely in memory). + * Thus, in this case, I/O is synchronous, as even if the mft record is not + * cached at this point in time, we need to wait for it to be read in before we + * can do the copy. + * + * Note the caller clears the page dirty flag before calling ntfs_writepage(). + * + * Based on ntfs_readpage() and fs/buffer.c::block_write_full_page(). + * + * Return 0 on success and -errno on error. + */ +static int ntfs_writepage(struct page *page) +{ + s64 attr_pos; + struct inode *vi; + ntfs_inode *ni, *base_ni; + char *kaddr; + attr_search_context *ctx; + MFT_RECORD *m; + u32 attr_len, bytes; + int err; + + BUG_ON(!PageLocked(page)); + + vi = page->mapping->host; + + /* Is the page fully outside i_size? (truncate in progress) */ + if (unlikely(page->index >= (vi->i_size + PAGE_CACHE_SIZE - 1) >> + PAGE_CACHE_SHIFT)) { + unlock_page(page); + ntfs_debug("Write outside i_size. Returning i/o error."); + return -EIO; + } + + ni = NTFS_I(vi); + + if (NInoNonResident(ni)) { + /* + * Only unnamed $DATA attributes can be compressed, encrypted, + * and/or sparse. + */ + if (ni->type == AT_DATA && !ni->name_len) { + /* If file is encrypted, deny access, just like NT4. */ + if (NInoEncrypted(ni)) { + unlock_page(page); + ntfs_debug("Denying write access to encrypted " + "file."); + return -EACCES; + } + /* Compressed data streams are handled in compress.c. */ + if (NInoCompressed(ni)) { + // TODO: Implement and replace this check with + // return ntfs_write_compressed_block(page); + unlock_page(page); + ntfs_error(vi->i_sb, "Writing to compressed " + "files is not supported yet. " + "Sorry."); + return -EOPNOTSUPP; + } + // TODO: Implement and remove this check. + if (NInoSparse(ni)) { + unlock_page(page); + ntfs_error(vi->i_sb, "Writing to sparse files " + "is not supported yet. Sorry."); + return -EOPNOTSUPP; + } + } + + /* We have to zero every time due to mmap-at-end-of-file. */ + if (page->index >= (vi->i_size >> PAGE_CACHE_SHIFT)) { + /* The page straddles i_size. */ + unsigned int ofs = vi->i_size & ~PAGE_CACHE_MASK; + kaddr = kmap_atomic(page, KM_USER0); + memset(kaddr + ofs, 0, PAGE_CACHE_SIZE - ofs); + flush_dcache_page(page); + kunmap_atomic(kaddr, KM_USER0); + } + + // TODO: Implement and remove this check. + if (NInoMstProtected(ni)) { + unlock_page(page); + ntfs_error(vi->i_sb, "Writing to MST protected " + "attributes is not supported yet. " + "Sorry."); + return -EOPNOTSUPP; + } + + /* Normal data stream. */ + return ntfs_write_block(page); + } + + /* + * Attribute is resident, implying it is not compressed, encrypted, or + * mst protected. + */ + BUG_ON(page_has_buffers(page)); + BUG_ON(!PageUptodate(page)); + + // TODO: Consider using PageWriteback() + unlock_page() in 2.5 once the + // "VM fiddling has ended". Note, don't forget to replace all the + // unlock_page() calls further below with end_page_writeback() ones. + // FIXME: Make sure it is ok to SetPageError() on unlocked page under + // writeback before doing the change! +#if 0 + SetPageWriteback(page); + unlock_page(page); +#endif + + if (!NInoAttr(ni)) + base_ni = ni; + else + base_ni = ni->ext.base_ntfs_ino; + + /* Map, pin, and lock the mft record. */ + m = map_mft_record(base_ni); + if (unlikely(IS_ERR(m))) { + err = PTR_ERR(m); + m = NULL; + ctx = NULL; + goto err_out; + } + ctx = get_attr_search_ctx(base_ni, m); + if (unlikely(!ctx)) { + err = -ENOMEM; + goto err_out; + } + if (unlikely(!lookup_attr(ni->type, ni->name, ni->name_len, + IGNORE_CASE, 0, NULL, 0, ctx))) { + err = -ENOENT; + goto err_out; + } + + /* Starting position of the page within the attribute value. */ + attr_pos = page->index << PAGE_CACHE_SHIFT; + + /* The total length of the attribute value. */ + attr_len = le32_to_cpu(ctx->attr->data.resident.value_length); + + if (unlikely(vi->i_size != attr_len)) { + ntfs_error(vi->i_sb, "BUG()! i_size (0x%Lx) doesn't match " + "attr_len (0x%x). Aborting write.", vi->i_size, + attr_len); + err = -EIO; + goto err_out; + } + if (unlikely(attr_pos >= attr_len)) { + ntfs_error(vi->i_sb, "BUG()! attr_pos (0x%Lx) > attr_len (0x%x)" + ". Aborting write.", attr_pos, attr_len); + err = -EIO; + goto err_out; + } + + bytes = attr_len - attr_pos; + if (unlikely(bytes > PAGE_CACHE_SIZE)) + bytes = PAGE_CACHE_SIZE; + + /* + * Here, we don't need to zero the out of bounds area everytime because + * the below memcpy() already takes care of the mmap-at-end-of-file + * requirements. If the file is converted to a non-resident one, then + * the code path use is switched to the non-resident one where the + * zeroing happens on each ntfs_writepage() invokation. + * + * The above also applies nicely when i_size is decreased. + * + * When i_size is increased, the memory between the old and new i_size + * _must_ be zeroed (or overwritten with new data). Otherwise we will + * expose data to userspace/disk which should never have been exposed. + * + * FIXME: Ensure that i_size increases do the zeroing/overwriting and + * if we cannot guarantee that, then enable the zeroing below. + */ + + kaddr = kmap_atomic(page, KM_USER0); + /* Copy the data from the page to the mft record. */ + memcpy((u8*)ctx->attr + le16_to_cpu( + ctx->attr->data.resident.value_offset) + attr_pos, + kaddr, bytes); + flush_dcache_mft_record_page(ctx->ntfs_ino); +#if 0 + /* Zero out of bounds area. */ + if (likely(bytes < PAGE_CACHE_SIZE)) { + memset(kaddr + bytes, 0, PAGE_CACHE_SIZE - bytes); + flush_dcache_page(page); + } +#endif + kunmap_atomic(kaddr, KM_USER0); + + unlock_page(page); + + // TODO: Mark mft record dirty so it gets written back. + ntfs_error(vi->i_sb, "Writing to resident files is not supported yet. " + "Wrote to memory only..."); + + put_attr_search_ctx(ctx); + unmap_mft_record(base_ni); + return 0; +err_out: + if (err == -ENOMEM) { + ntfs_warning(vi->i_sb, "Error allocating memory. Redirtying " + "page so we try again later."); + /* + * Put the page back on mapping->dirty_pages, but leave its + * buffer's dirty state as-is. + */ + // FIXME: Once Andrew's -EAGAIN patch goes in, remove the + // __set_page_dirty_nobuffers(page) and set err to -EAGAIN + // instead of zero. + set_page_dirty(page); + err = 0; + } else { + ntfs_error(vi->i_sb, "Resident attribute write failed with " + "error %i. Setting page error flag.", -err); + SetPageError(page); + } + unlock_page(page); + if (ctx) + put_attr_search_ctx(ctx); + if (m) + unmap_mft_record(base_ni); + return err; +} + +/** + * ntfs_prepare_nonresident_write - + * + */ +static int ntfs_prepare_nonresident_write(struct page *page, + unsigned from, unsigned to) +{ + VCN vcn; + LCN lcn; + sector_t block, ablock, iblock; + struct inode *vi; + ntfs_inode *ni; + ntfs_volume *vol; + run_list_element *rl; + struct buffer_head *bh, *head, *wait[2], **wait_bh = wait; + char *kaddr = page_address(page); + unsigned int vcn_ofs, block_start, block_end, blocksize; + int err; + BOOL is_retry; + unsigned char blocksize_bits; + + vi = page->mapping->host; + ni = NTFS_I(vi); + vol = ni->vol; + + ntfs_debug("Entering for inode %li, attribute type 0x%x, page index " + "0x%lx, from = %u, to = %u.", vi->i_ino, ni->type, + page->index, from, to); + + BUG_ON(!NInoNonResident(ni)); + BUG_ON(NInoMstProtected(ni)); + + blocksize_bits = vi->i_blkbits; + blocksize = 1 << blocksize_bits; + + /* + * create_empty_buffers() will create uptodate/dirty buffers if the + * page is uptodate/dirty. + */ + if (!page_has_buffers(page)) + create_empty_buffers(page, vi->i_dev, blocksize); + bh = head = page_buffers(page); + if (unlikely(!bh)) + return -ENOMEM; + + /* The first block in the page. */ + block = page->index << (PAGE_CACHE_SHIFT - blocksize_bits); + + /* + * The first out of bounds block for the allocated size. No need to + * round up as allocated_size is in multiples of cluster size and the + * minimum cluster size is 512 bytes, which is equal to the smallest + * blocksize. + */ + ablock = ni->allocated_size >> blocksize_bits; + + /* The last (fully or partially) initialized block. */ + iblock = ni->initialized_size >> blocksize_bits; + + /* Loop through all the buffers in the page. */ + block_start = 0; + rl = NULL; + err = 0; + do { + block_end = block_start + blocksize; + /* + * If buffer @bh is outside the write, just mark it uptodate + * if the page is uptodate and continue with the next buffer. + */ + if (block_end <= from || block_start >= to) { + if (PageUptodate(page)) { + if (!buffer_uptodate(bh)) + set_buffer_uptodate(bh); + } + continue; + } + /* + * @bh is at least partially being written to. + * Make sure it is not marked as new. + */ + //if (buffer_new(bh)) + // clear_buffer_new(bh); + + if (block >= ablock) { + // TODO: block is above allocated_size, need to + // allocate it. Best done in one go to accomodate not + // only block but all above blocks up to and including: + // ((page->index << PAGE_CACHE_SHIFT) + to + blocksize + // - 1) >> blobksize_bits. Obviously will need to round + // up to next cluster boundary, too. This should be + // done with a helper function, so it can be reused. + ntfs_error(vol->sb, "Writing beyond allocated size " + "is not supported yet. Sorry."); + err = -EOPNOTSUPP; + goto err_out; + // Need to update ablock. + // Need to set_buffer_new() on all block bhs that are + // newly allocated. + } + /* + * Now we have enough allocated size to fulfill the whole + * request, i.e. block < ablock is true. + */ + if (unlikely((block >= iblock) && + (ni->initialized_size < vi->i_size))) { + /* + * If this page is fully outside initialized size, zero + * out all pages between the current initialized size + * and the current page. Just use ntfs_readpage() to do + * the zeroing transparently. + */ + if (block > iblock) { + // TODO: + // For each page do: + // - read_cache_page() + // Again for each page do: + // - wait_on_page_locked() + // - Check (PageUptodate(page) && + // !PageError(page)) + // Update initialized size in the attribute and + // in the inode. + // Again, for each page do: + // __set_page_dirty_buffers(); + // page_cache_release() + // We don't need to wait on the writes. + // Update iblock. + } + /* + * The current page straddles initialized size. Zero + * all non-uptodate buffers and set them uptodate (and + * dirty?). Note, there aren't any non-uptodate buffers + * if the page is uptodate. + * FIXME: For an uptodate page, the buffers may need to + * be written out because they were not initialized on + * disk before. + */ + if (!PageUptodate(page)) { + // TODO: + // Zero any non-uptodate buffers up to i_size. + // Set them uptodate and dirty. + } + // TODO: + // Update initialized size in the attribute and in the + // inode (up to i_size). + // Update iblock. + // FIXME: This is inefficient. Try to batch the two + // size changes to happen in one go. + ntfs_error(vol->sb, "Writing beyond initialized size " + "is not supported yet. Sorry."); + err = -EOPNOTSUPP; + goto err_out; + // Do NOT set_buffer_new() BUT DO clear buffer range + // outside write request range. + // set_buffer_uptodate() on complete buffers as well as + // set_buffer_dirty(). + } + + /* Need to map unmapped buffers. */ + if (!buffer_mapped(bh)) { + /* Unmapped buffer. Need to map it. */ + bh->b_dev = vi->i_dev; + + /* Convert block into corresponding vcn and offset. */ + vcn = (VCN)block << blocksize_bits >> + vol->cluster_size_bits; + vcn_ofs = ((VCN)block << blocksize_bits) & + vol->cluster_size_mask; + + is_retry = FALSE; + if (!rl) { +lock_retry_remap: + down_read(&ni->run_list.lock); + rl = ni->run_list.rl; + } + if (likely(rl != NULL)) { + /* Seek to element containing target vcn. */ + while (rl->length && rl[1].vcn <= vcn) + rl++; + lcn = vcn_to_lcn(rl, vcn); + } else + lcn = (LCN)LCN_RL_NOT_MAPPED; + if (unlikely(lcn < 0)) { + /* + * We extended the attribute allocation above. + * If we hit an ENOENT here it means that the + * allocation was insufficient which is a bug. + */ + BUG_ON(lcn == LCN_ENOENT); + + /* It is a hole, need to instantiate it. */ + if (lcn == LCN_HOLE) { + // TODO: Instantiate the hole. + // clear_buffer_new(bh); + // unmap_underlying_metadata(bh->b_bdev, + // bh->b_blocknr); + // For non-uptodate buffers, need to + // zero out the region outside the + // request in this bh or all bhs, + // depending on what we implemented + // above. + // Need to flush_dcache_page(). + // Or could use set_buffer_new() + // instead? + ntfs_error(vol->sb, "Writing into " + "sparse regions is " + "not supported yet. " + "Sorry."); + err = -EOPNOTSUPP; + goto err_out; + } else if (!is_retry && + lcn == LCN_RL_NOT_MAPPED) { + is_retry = TRUE; + /* + * Attempt to map run list, dropping + * lock for the duration. + */ + up_read(&ni->run_list.lock); + err = map_run_list(ni, vcn); + if (likely(!err)) + goto lock_retry_remap; + rl = NULL; + } + /* + * Failed to map the buffer, even after + * retrying. + */ + bh->b_blocknr = -1UL; + ntfs_error(vol->sb, "vcn_to_lcn(vcn = 0x%Lx) " + "failed with error code " + "0x%Lx%s.", (long long)vcn, + (long long)-lcn, is_retry ? + " even after retrying" : ""); + // FIXME: Depending on vol->on_errors, do + // something. + if (!err) + err = -EIO; + goto err_out; + } + /* We now have a successful remap, i.e. lcn >= 0. */ + + /* Setup buffer head to correct block. */ + bh->b_blocknr = ((lcn << vol->cluster_size_bits) + + vcn_ofs) >> blocksize_bits; + set_buffer_mapped(bh); + + // FIXME: Something analogous to this is needed for + // each newly allocated block, i.e. BH_New. + // FIXME: Might need to take this out of the + // if (!buffer_mapped(bh)) {}, depending on how we + // implement things during the allocated_size and + // initialized_size extension code above. + if (buffer_new(bh)) { + clear_buffer_new(bh); + // FIXME: In 2.4.x kernels, need to export + // unmap_underlying_metadata() and then need to + // uncomment the next line. For now it doesn't + // matter as we never get here... (AIA) + // unmap_underlying_metadata(bh); + if (PageUptodate(page)) { + set_buffer_uptodate(bh); + continue; + } + /* + * Page is _not_ uptodate, zero surrounding + * region. NOTE: This is how we decide if to + * zero or not! + */ + if (block_end > to) + memset(kaddr + to, 0, block_end - to); + if (block_start < from) + memset(kaddr + block_start, 0, + from - block_start); + if (block_end > to || block_start < from) + flush_dcache_page(page); + continue; + } + } + /* @bh is mapped, set it uptodate if the page is uptodate. */ + if (PageUptodate(page)) { + if (!buffer_uptodate(bh)) + set_buffer_uptodate(bh); + continue; + } + /* + * The page is not uptodate. The buffer is mapped. If it is not + * uptodate, and it is only partially being written to, we need + * to read the buffer in before the write, i.e. right now. + */ + if (!buffer_uptodate(bh) && + (block_start < from || block_end > to)) { + ll_rw_block(READ, 1, &bh); + *wait_bh++ = bh; + } + } while (block++, block_start = block_end, + (bh = bh->b_this_page) != head); + + /* Release the lock if we took it. */ + if (rl) { + up_read(&ni->run_list.lock); + rl = NULL; + } + + /* If we issued read requests, let them complete. */ + while (wait_bh > wait) { + wait_on_buffer(*--wait_bh); + if (!buffer_uptodate(*wait_bh)) + return -EIO; + } + + ntfs_debug("Done."); + return 0; +err_out: + /* + * Zero out any newly allocated blocks to avoid exposing stale data. + * If BH_New is set, we know that the block was newly allocated in the + * above loop. + * FIXME: What about initialized_size increments? Have we done all the + * required zeroing above? If not this error handling is broken, and + * in particular the if (block_end <= from) check is completely bogus. + */ + bh = head; + block_start = 0; + is_retry = FALSE; + do { + block_end = block_start + blocksize; + if (block_end <= from) + continue; + if (block_start >= to) + break; + if (buffer_new(bh)) { + clear_buffer_new(bh); + if (buffer_uptodate(bh)) + ntfs_error(vol->sb, "Zeroing uptodate buffer!"); + memset(kaddr + block_start, 0, bh->b_size); + set_buffer_uptodate(bh); + mark_buffer_dirty(bh); + is_retry = TRUE; + } + } while (block_start = block_end, (bh = bh->b_this_page) != head); + if (is_retry) + flush_dcache_page(page); + if (rl) + up_read(&ni->run_list.lock); + return err; +} + +/** + * ntfs_prepare_write - prepare a page for receiving data + * + * This is called from generic_file_write() with i_sem held on the inode + * (@page->mapping->host). The @page is locked and kmap()ped so page_address() + * can simply be used. The source data has not yet been copied into the @page. + * + * Need to extend the attribute/fill in holes if necessary, create blocks and + * make partially overwritten blocks uptodate, + * + * i_size is not to be modified yet. + * + * Return 0 on success or -errno on error. + * + * Should be using block_prepare_write() [support for sparse files] or + * cont_prepare_write() [no support for sparse files]. Can't do that due to + * ntfs specifics but can look at them for implementation guidancea. + * + * Note: In the range, @from is inclusive and @to is exclusive, i.e. @from is + * the first byte in the page that will be written to and @to is the first byte + * after the last byte that will be written to. + */ +static int ntfs_prepare_write(struct file *file, struct page *page, + unsigned from, unsigned to) +{ + struct inode *vi = page->mapping->host; + ntfs_inode *ni = NTFS_I(vi); + + ntfs_debug("Entering for inode %li, attribute type 0x%x, page index " + "0x%lx, from = %u, to = %u.", vi->i_ino, ni->type, + page->index, from, to); + + BUG_ON(!PageLocked(page)); + BUG_ON(from > PAGE_CACHE_SIZE); + BUG_ON(to > PAGE_CACHE_SIZE); + BUG_ON(from > to); + + if (NInoNonResident(ni)) { + /* + * Only unnamed $DATA attributes can be compressed, encrypted, + * and/or sparse. + */ + if (ni->type == AT_DATA && !ni->name_len) { + /* If file is encrypted, deny access, just like NT4. */ + if (NInoEncrypted(ni)) { + ntfs_debug("Denying write access to encrypted " + "file."); + return -EACCES; + } + /* Compressed data streams are handled in compress.c. */ + if (NInoCompressed(ni)) { + // TODO: Implement and replace this check with + // return ntfs_write_compressed_block(page); + ntfs_error(vi->i_sb, "Writing to compressed " + "files is not supported yet. " + "Sorry."); + return -EOPNOTSUPP; + } + // TODO: Implement and remove this check. + if (NInoSparse(ni)) { + ntfs_error(vi->i_sb, "Writing to sparse files " + "is not supported yet. Sorry."); + return -EOPNOTSUPP; + } + } + + // TODO: Implement and remove this check. + if (NInoMstProtected(ni)) { + ntfs_error(vi->i_sb, "Writing to MST protected " + "attributes is not supported yet. " + "Sorry."); + return -EOPNOTSUPP; + } + + /* Normal data stream. */ + return ntfs_prepare_nonresident_write(page, from, to); + } + + /* + * Attribute is resident, implying it is not compressed, encrypted, or + * mst protected. + */ + BUG_ON(page_has_buffers(page)); + + /* Do we need to resize the attribute? */ + if (((s64)page->index << PAGE_CACHE_SHIFT) + to > vi->i_size) { + // TODO: Implement resize... + ntfs_error(vi->i_sb, "Writing beyond the existing file size is " + "not supported yet. Sorry."); + return -EOPNOTSUPP; + } + + /* + * Because resident attributes are handled by memcpy() to/from the + * corresponding MFT record, and because this form of i/o is byte + * aligned rather than block aligned, there is no need to bring the + * page uptodate here as in the non-resident case where we need to + * bring the buffers straddled by the write uptodate before + * generic_file_write() does the copying from userspace. + * + * We thus defer the uptodate bringing of the page region outside the + * region written to to ntfs_commit_write(). The reason for doing this + * is that we save one round of: + * map_mft_record(), get_attr_search_ctx(), lookup_attr(), + * kmap_atomic(), kunmap_atomic(), put_attr_search_ctx(), + * unmap_mft_record(). + * Which is obviously a very worthwhile save. + * + * Thus we just return success now... + */ + ntfs_debug("Done."); + return 0; +} + +/* + * NOTES: There is a disparity between the apparent need to extend the + * attribute in prepare write but to update i_size only in commit write. + * Need to make sure i_sem protection is sufficient. And if not will need to + * handle this in some way or another. + */ + +/** + * ntfs_commit_nonresident_write - + * + */ +static int ntfs_commit_nonresident_write(struct page *page, + unsigned from, unsigned to) +{ + s64 pos = ((s64)page->index << PAGE_CACHE_SHIFT) + to; + struct inode *vi; + struct buffer_head *bh, *head; + unsigned int block_start, block_end, blocksize; + BOOL partial, need_balance_dirty; + + vi = page->mapping->host; + + ntfs_debug("Entering for inode %li, attribute type 0x%x, page index " + "0x%lx, from = %u, to = %u.", vi->i_ino, + NTFS_I(vi)->type, page->index, from, to); + + blocksize = 1 << vi->i_blkbits; + + // FIXME: We need a whole slew of special cases in here for MST + // protected attributes for example. For compressed files, too... + // For now, we know ntfs_prepare_write() would have failed so we can't + // get here in any of the cases which we have to special case, so we + // are just a ripped off unrolled generic_commit_write() at present. + + bh = head = page_buffers(page); + block_start = 0; + partial = need_balance_dirty = FALSE; + do { + block_end = block_start + blocksize; + if (block_end <= from || block_start >= to) { + if (!buffer_uptodate(bh)) + partial = TRUE; + } else { + set_buffer_uptodate(bh); + if (!atomic_set_buffer_dirty(bh)) { + __mark_dirty(bh); + buffer_insert_inode_data_queue(bh, vi); + need_balance_dirty = TRUE; + } + } + } while (block_start = block_end, (bh = bh->b_this_page) != head); + + if (need_balance_dirty) + balance_dirty(); + /* + * If this is a partial write which happened to make all buffers + * uptodate then we can optimize away a bogus ->readpage() for the next + * read(). Here we 'discover' whether the page went uptodate as a + * result of this (potentially partial) write. + */ + if (!partial) + SetPageUptodate(page); + + /* + * Not convinced about this at all. See disparity comment above. For + * now we know ntfs_prepare_write() would have failed in the write + * exceeds i_size case, so this will never trigger which is fine. + */ + if (pos > vi->i_size) { + ntfs_error(vi->i_sb, "Writing beyond the existing file size is " + "not supported yet. Sorry."); + return -EOPNOTSUPP; + // vi->i_size = pos; + // mark_inode_dirty(vi); + } + ntfs_debug("Done."); + return 0; +} + +/** + * ntfs_commit_write - commit the received data + * + * This is called from generic_file_write() with i_sem held on the inode + * (@page->mapping->host). The @page is locked and kmap()ped so page_address() + * can simply be used. The source data has already been copied into the @page. + * + * Need to mark modified blocks dirty so they get written out later when + * ntfs_writepage() is invoked by the VM. + * + * Return 0 on success or -errno on error. + * + * Should be using generic_commit_write(). This marks buffers uptodate and + * dirty, sets the page uptodate if all buffers in the page are uptodate, and + * updates i_size if the end of io is beyond i_size. In that case, it also + * marks the inode dirty. - We could still use this (obviously except for + * NInoMstProtected() attributes, where we will need to duplicate the core code + * because we need our own async_io completion handler) but we could just do + * the i_size update in prepare write, when we resize the attribute. Then + * we would avoid the i_size update and mark_inode_dirty() happening here. + * + * Can't use generic_commit_write() due to ntfs specialities but can look at + * it for implementation guidance. + * + * If things have gone as outlined in ntfs_prepare_write(), then we do not + * need to do any page content modifications here at all, except in the write + * to resident attribute case, where we need to do the uptodate bringing here + * which we combine with the copying into the mft record which means we only + * need to map the mft record and find the attribute record in it only once. + */ +static int ntfs_commit_write(struct file *file, struct page *page, + unsigned from, unsigned to) +{ + s64 attr_pos; + struct inode *vi; + ntfs_inode *ni, *base_ni; + char *kaddr, *kattr; + attr_search_context *ctx; + MFT_RECORD *m; + u32 attr_len, bytes; + int err; + + vi = page->mapping->host; + ni = NTFS_I(vi); + + ntfs_debug("Entering for inode %li, attribute type 0x%x, page index " + "0x%lx, from = %u, to = %u.", vi->i_ino, ni->type, + page->index, from, to); + + if (NInoNonResident(ni)) { + /* + * Only unnamed $DATA attributes can be compressed, encrypted, + * and/or sparse. + */ + if (ni->type == AT_DATA && !ni->name_len) { + /* If file is encrypted, deny access, just like NT4. */ + if (NInoEncrypted(ni)) { + // Should never get here! + ntfs_debug("Denying write access to encrypted " + "file."); + return -EACCES; + } + /* Compressed data streams are handled in compress.c. */ + if (NInoCompressed(ni)) { + // TODO: Implement and replace this check with + // return ntfs_write_compressed_block(page); + // Should never get here! + ntfs_error(vi->i_sb, "Writing to compressed " + "files is not supported yet. " + "Sorry."); + return -EOPNOTSUPP; + } + // TODO: Implement and remove this check. + if (NInoSparse(ni)) { + // Should never get here! + ntfs_error(vi->i_sb, "Writing to sparse files " + "is not supported yet. Sorry."); + return -EOPNOTSUPP; + } + } + + // TODO: Implement and remove this check. + if (NInoMstProtected(ni)) { + // Should never get here! + ntfs_error(vi->i_sb, "Writing to MST protected " + "attributes is not supported yet. " + "Sorry."); + return -EOPNOTSUPP; + } + + /* Normal data stream. */ + return ntfs_commit_nonresident_write(page, from, to); + } + + /* + * Attribute is resident, implying it is not compressed, encrypted, or + * mst protected. + */ + + /* Do we need to resize the attribute? */ + if (((s64)page->index << PAGE_CACHE_SHIFT) + to > vi->i_size) { + // TODO: Implement resize... + // pos = ((s64)page->index << PAGE_CACHE_SHIFT) + to; + // vi->i_size = pos; + // mark_inode_dirty(vi); + // Should never get here! + ntfs_error(vi->i_sb, "Writing beyond the existing file size is " + "not supported yet. Sorry."); + return -EOPNOTSUPP; + } + + if (!NInoAttr(ni)) + base_ni = ni; + else + base_ni = ni->ext.base_ntfs_ino; + + /* Map, pin, and lock the mft record. */ + m = map_mft_record(base_ni); + if (unlikely(IS_ERR(m))) { + err = PTR_ERR(m); + m = NULL; + ctx = NULL; + goto err_out; + } + ctx = get_attr_search_ctx(base_ni, m); + if (unlikely(!ctx)) { + err = -ENOMEM; + goto err_out; + } + if (unlikely(!lookup_attr(ni->type, ni->name, ni->name_len, + IGNORE_CASE, 0, NULL, 0, ctx))) { + err = -ENOENT; + goto err_out; + } + + /* Starting position of the page within the attribute value. */ + attr_pos = page->index << PAGE_CACHE_SHIFT; + + /* The total length of the attribute value. */ + attr_len = le32_to_cpu(ctx->attr->data.resident.value_length); + + if (unlikely(vi->i_size != attr_len)) { + ntfs_error(vi->i_sb, "BUG()! i_size (0x%Lx) doesn't match " + "attr_len (0x%x). Aborting write.", vi->i_size, + attr_len); + err = -EIO; + goto err_out; + } + if (unlikely(attr_pos >= attr_len)) { + ntfs_error(vi->i_sb, "BUG()! attr_pos (0x%Lx) > attr_len (0x%x)" + ". Aborting write.", attr_pos, attr_len); + err = -EIO; + goto err_out; + } + + bytes = attr_len - attr_pos; + if (unlikely(bytes > PAGE_CACHE_SIZE)) + bytes = PAGE_CACHE_SIZE; + + /* + * Calculate the address of the attribute value corresponding to the + * beginning of the current data @page. + */ + kattr = (u8*)ctx->attr + le16_to_cpu( + ctx->attr->data.resident.value_offset) + attr_pos; + + kaddr = kmap_atomic(page, KM_USER0); + + /* Copy the received data from the page to the mft record. */ + memcpy(kattr + from, kaddr + from, to - from); + flush_dcache_mft_record_page(ctx->ntfs_ino); + + if (!PageUptodate(page)) { + /* + * Bring the out of bounds area(s) uptodate by copying data + * from the mft record to the page. + */ + if (from > 0) + memcpy(kaddr, kattr, from); + if (to < bytes) + memcpy(kaddr + to, kattr + to, bytes - to); + + /* Zero the region outside the end of the attribute value. */ + if (likely(bytes < PAGE_CACHE_SIZE)) + memset(kaddr + bytes, 0, PAGE_CACHE_SIZE - bytes); + + /* + * The probability of not having done any of the above is + * extremely small, so we just flush unconditionally. + */ + flush_dcache_page(page); + SetPageUptodate(page); + } + kunmap_atomic(kaddr, KM_USER0); + + // TODO: Mark mft record dirty so it gets written back. + ntfs_error(vi->i_sb, "Writing to resident files is not supported yet. " + "Wrote to memory only..."); + + put_attr_search_ctx(ctx); + unmap_mft_record(base_ni); + ntfs_debug("Done."); + return 0; +err_out: + if (err == -ENOMEM) { + ntfs_warning(vi->i_sb, "Error allocating memory required to " + "commit the write."); + if (PageUptodate(page)) { + ntfs_warning(vi->i_sb, "Page is uptodate, setting " + "dirty so the write will be retried " + "later on by the VM."); + /* + * Put the page on mapping->dirty_pages, but leave its + * buffer's dirty state as-is. + */ + set_page_dirty(page); + err = 0; + } else + ntfs_error(vi->i_sb, "Page is not uptodate. Written " + "data has been lost. )-:"); + } else { + ntfs_error(vi->i_sb, "Resident attribute write failed with " + "error %i. Setting page error flag.", -err); + SetPageError(page); + } + if (ctx) + put_attr_search_ctx(ctx); + if (m) + unmap_mft_record(base_ni); + return err; +} + +#endif /* NTFS_RW */ + +/** + * ntfs_aops - general address space operations for inodes and attributes + */ +struct address_space_operations ntfs_aops = { + .readpage = ntfs_readpage, /* Fill page with data. */ + .sync_page = block_sync_page, /* Currently, just unplugs the + disk request queue. */ +#ifdef NTFS_RW + .writepage = ntfs_writepage, /* Write dirty page to disk. */ + .prepare_write = ntfs_prepare_write, /* Prepare page and buffers + ready to receive data. */ + .commit_write = ntfs_commit_write, /* Commit received data. */ +#endif +}; + diff -urN linux-2.4.24-vanilla/fs/ntfs/attr.c linux-2.4.24-ntfs-2.1.6a/fs/ntfs/attr.c --- linux-2.4.24-vanilla/fs/ntfs/attr.c 2001-12-21 17:42:03.000000000 +0000 +++ linux-2.4.24-ntfs-2.1.6a/fs/ntfs/attr.c 1970-01-01 01:00:00.000000000 +0100 @@ -1,872 +0,0 @@ -/* - * attr.c - * - * Copyright (C) 1996-1999 Martin von Löwis - * Copyright (C) 1996-1997 Régis Duchesne - * Copyright (C) 1998 Joseph Malicki - * Copyright (C) 1999 Steve Dodd - * Copyright (C) 2001 Anton Altaparmakov (AIA) - */ - -#include "ntfstypes.h" -#include "struct.h" -#include "attr.h" - -#include -#include -#include "macros.h" -#include "support.h" -#include "util.h" -#include "super.h" -#include "inode.h" -#include "unistr.h" - -/** - * ntfs_find_attr_in_mft_rec - find attribute in mft record - * @vol: volume on which attr resides - * @m: mft record to search - * @type: attribute type to find - * @name: attribute name to find (optional, i.e. NULL means don't care) - * @name_len: attribute name length (only needed if @name present) - * @ic: ignore case if 1 or case sensitive if 0 (ignored if @name NULL) - * @instance: instance number to find - * - * Only search the specified mft record and it ignores the presence of an - * attribute list attribute (unless it is the one being searched for, - * obviously, in which case it is returned). - */ -ntfs_u8* ntfs_find_attr_in_mft_rec(ntfs_volume *vol, ntfs_u8 *m, __u32 type, - wchar_t *name, __u32 name_len, int ic, __u16 instance) -{ - ntfs_u8 *a; - - /* Iterate over attributes in mft record @m. */ - a = m + NTFS_GETU16(m + 20); /* attrs_offset */ - for (; a >= m && a <= m + vol->mft_record_size; - a += NTFS_GETU32(a + 4 /* length */)) { - /* We catch $END with this more general check, too... */ - if (NTFS_GETU32(a + 0 /* type */) > type) - return NULL; - if (!NTFS_GETU32(a + 4 /* length */)) - break; - if (NTFS_GETU32(a + 0 /* type */) != type) - continue; - /* If @name is present, compare the two names. */ - if (name && !ntfs_are_names_equal(name, name_len, (wchar_t*) - (a + NTFS_GETU16(a + 10 /* name_offset */)), - a[9] /* name_length */, ic, vol->upcase, - vol->upcase_length)) { - register int rc; - - rc = ntfs_collate_names(vol->upcase, vol->upcase_length, - name, name_len, (wchar_t*)(a + - NTFS_GETU16(a + 10 /* name_offset */)), - a[9] /* name_length */, 1, 1); - /* - * If @name collates before a->name, there is no - * matching attribute. - */ - if (rc == -1) - return NULL; - /* If the strings are not equal, continue search. */ - if (rc) - continue; - rc = ntfs_collate_names(vol->upcase, vol->upcase_length, - name, name_len, (wchar_t*)(a + - NTFS_GETU16(a + 10 /* name_offset */)), - a[9] /* name_length */, 0, 1); - if (rc == -1) - return NULL; - if (rc) - continue; - } - /* - * The names match or @name not present. Check instance number. - * and if it matches we have found the attribute and are done. - */ - if (instance != NTFS_GETU16(a + 14 /* instance */)) - continue; - ntfs_debug(DEBUG_FILE3, "ntfs_find_attr_in_mft_record: found: " - "attr type 0x%x, instance number = 0x%x.\n", - NTFS_GETU32(a + 0), instance); - return a; - } - ntfs_error("ntfs_find_attr_in_mft_record: mft record 0x%x is corrupt" - ". Run chkdsk.\n", m); - return NULL; -} - -/* Look if an attribute already exists in the inode, and if not, create it. */ -int ntfs_new_attr(ntfs_inode *ino, int type, void *name, int namelen, - void *value, int value_len, int *pos, int *found) -{ - int do_insert = 0; - int i, m; - ntfs_attribute *a; - - for (i = 0; i < ino->attr_count; i++) - { - a = ino->attrs + i; - if (a->type < type) - continue; - if (a->type > type) { - do_insert = 1; - break; - } - /* If @name is present, compare the two names. */ - if (namelen && !ntfs_are_names_equal((wchar_t*)name, namelen, - a->name, a->namelen /* name_length */, - 1 /* ignore case*/, ino->vol->upcase, - ino->vol->upcase_length)) { - register int rc; - - rc = ntfs_collate_names(ino->vol->upcase, - ino->vol->upcase_length, a->name, - a->namelen, (wchar_t*)name, namelen, - 1 /* ignore case */, 1); - if (rc == -1) - continue; - if (rc == 1) { - do_insert = 1; - break; - } - rc = ntfs_collate_names(ino->vol->upcase, - ino->vol->upcase_length, a->name, - a->namelen, (wchar_t*)name, namelen, - 0 /* case sensitive */, 1); - if (rc == -1) - continue; - if (rc == 1) { - do_insert = 1; - break; - } - } - /* Names are equal or no name was asked for. */ - /* If a value was specified compare the values. */ - if (value_len && a->resident) { - if (!a->resident) { - ntfs_error("ntfs_new_attr: Value specified but " - "attribute non-resident. Bug!\n"); - return -EINVAL; - } - m = value_len; - if (m > a->size) - m = a->size; - m = memcmp(value, a->d.data, m); - if (m > 0) - continue; - if (m < 0) { - do_insert = 1; - break; - } - /* Values match until min of value lengths. */ - if (value_len > a->size) - continue; - if (value_len < a->size) { - do_insert = 1; - break; - } - } - /* Full match! */ - *found = 1; - *pos = i; - return 0; - } - /* Re-allocate space. */ - if (ino->attr_count % 8 == 0) - { - ntfs_attribute* new; - new = (ntfs_attribute*)ntfs_malloc((ino->attr_count + 8) * - sizeof(ntfs_attribute)); - if (!new) - return -ENOMEM; - if (ino->attrs) { - ntfs_memcpy(new, ino->attrs, ino->attr_count * - sizeof(ntfs_attribute)); - ntfs_free(ino->attrs); - } - ino->attrs = new; - } - if (do_insert) - ntfs_memmove(ino->attrs + i + 1, ino->attrs + i, - (ino->attr_count - i) * sizeof(ntfs_attribute)); - ino->attr_count++; - ino->attrs[i].type = type; - ino->attrs[i].namelen = namelen; - ino->attrs[i].name = name; - *pos = i; - *found = 0; - return 0; -} - -int ntfs_make_attr_resident(ntfs_inode *ino, ntfs_attribute *attr) -{ - __s64 size = attr->size; - if (size > 0) { - /* FIXME: read data, free clusters */ - return -EOPNOTSUPP; - } - attr->resident = 1; - return 0; -} - -/* Store in the inode readable information about a run. */ -int ntfs_insert_run(ntfs_attribute *attr, int cnum, ntfs_cluster_t cluster, - int len) -{ - /* (re-)allocate space if necessary. */ - if ((attr->d.r.len * sizeof(ntfs_runlist)) % PAGE_SIZE == 0) { - ntfs_runlist* new; - unsigned long new_size; - - ntfs_debug(DEBUG_MALLOC, "ntfs_insert_run: re-allocating " - "space: old attr->d.r.len = 0x%x\n", - attr->d.r.len); - new_size = attr->d.r.len * sizeof(ntfs_runlist) + PAGE_SIZE; - if ((new_size >> PAGE_SHIFT) > num_physpages) { - ntfs_error("ntfs_insert_run: attempted to allocate " - "more pages than num_physpages." - "This might be a bug or a corrupt" - "file system.\n"); - return -1; - } - new = ntfs_vmalloc(new_size); - if (!new) { - ntfs_error("ntfs_insert_run: ntfs_vmalloc(new_size = " - "0x%x) failed\n", new_size); - return -1; - } - if (attr->d.r.runlist) { - ntfs_memcpy(new, attr->d.r.runlist, attr->d.r.len - * sizeof(ntfs_runlist)); - ntfs_vfree(attr->d.r.runlist); - } - attr->d.r.runlist = new; - } - if (attr->d.r.len > cnum) - ntfs_memmove(attr->d.r.runlist + cnum + 1, - attr->d.r.runlist + cnum, - (attr->d.r.len - cnum) * sizeof(ntfs_runlist)); - attr->d.r.runlist[cnum].lcn = cluster; - attr->d.r.runlist[cnum].len = len; - attr->d.r.len++; - return 0; -} - -/** - * ntfs_extend_attr - extend allocated size of an attribute - * @ino: ntfs inode containing the attribute to extend - * @attr: attribute which to extend - * @len: desired new length for @attr (_not_ the amount to extend by) - * - * Extends an attribute. Allocate clusters on the volume which @ino belongs to. - * Extends the run list accordingly, preferably by extending the last run of - * the existing run list, first. - * - * Only modifies attr->allocated, i.e. doesn't touch attr->size, nor - * attr->initialized. - */ -int ntfs_extend_attr(ntfs_inode *ino, ntfs_attribute *attr, const __s64 len) -{ - int rlen, rl2_len, err = 0; - ntfs_cluster_t cluster, clen; - ntfs_runlist *rl, *rl2; - - if ((attr->flags & (ATTR_IS_COMPRESSED | ATTR_IS_ENCRYPTED)) || - ino->record_count > 1) - return -EOPNOTSUPP; - /* - * FIXME: Don't make non-resident if the attribute type is not right. - * For example cannot make index attribute non-resident! (AIA) - */ - if (attr->resident) { - err = ntfs_make_attr_nonresident(ino, attr); - if (err) - return err; - } - if (len <= attr->allocated) - return 0; /* Truly stupid things do sometimes happen. */ - rl = attr->d.r.runlist; - rlen = attr->d.r.len; - if (rlen > 0) - cluster = rl[rlen - 1].lcn + rl[rlen - 1].len; - else - /* No preference for allocation space. */ - cluster = (ntfs_cluster_t)-1; - /* - * Calculate the extra space we need, and round up to multiple of - * cluster size to get number of new clusters needed. - */ - clen = (len - attr->allocated + ino->vol->cluster_size - 1) >> - ino->vol->cluster_size_bits; - if (!clen) - return 0; - err = ntfs_allocate_clusters(ino->vol, &cluster, &clen, &rl2, - &rl2_len, DATA_ZONE); - if (err) - return err; - attr->allocated += (__s64)clen << ino->vol->cluster_size_bits; - if (rlen > 0) { - err = splice_runlists(&rl, &rlen, rl2, rl2_len); - ntfs_vfree(rl2); - if (err) - return err; - } else { - if (rl) - ntfs_vfree(rl); - rl = rl2; - rlen = rl2_len; - } - attr->d.r.runlist = rl; - attr->d.r.len = rlen; - return 0; -} - -int ntfs_make_attr_nonresident(ntfs_inode *ino, ntfs_attribute *attr) -{ - int error; - ntfs_io io; - void *data = attr->d.data; - __s64 len = attr->size; - - attr->d.r.len = 0; - attr->d.r.runlist = NULL; - attr->resident = 0; - /* - * ->allocated is updated by ntfs_extend_attr(), while ->initialized - * and ->size are updated by ntfs_readwrite_attr(). (AIA) - */ - attr->allocated = attr->initialized = 0; - error = ntfs_extend_attr(ino, attr, len); - if (error) - return error; /* FIXME: On error, restore old values. */ - io.fn_put = ntfs_put; - io.fn_get = ntfs_get; - io.param = data; - io.size = len; - io.do_read = 0; - return ntfs_readwrite_attr(ino, attr, 0, &io); -} - -int ntfs_attr_allnonresident(ntfs_inode *ino) -{ - int i, error = 0; - ntfs_volume *vol = ino->vol; - - for (i = 0; !error && i < ino->attr_count; i++) - { - if (ino->attrs[i].type != vol->at_security_descriptor && - ino->attrs[i].type != vol->at_data) - continue; - error = ntfs_make_attr_nonresident(ino, ino->attrs + i); - } - return error; -} - -/* - * Resize the attribute to a newsize. attr->allocated and attr->size are - * updated, but attr->initialized is not changed unless it becomes bigger than - * attr->size, in which case it is set to attr->size. - */ -int ntfs_resize_attr(ntfs_inode *ino, ntfs_attribute *attr, __s64 newsize) -{ - int error = 0; - __s64 oldsize = attr->size; - int clustersizebits = ino->vol->cluster_size_bits; - int i, count, newcount; - ntfs_runlist *rl, *rlt; - - if (newsize == oldsize) - return 0; - if (attr->flags & (ATTR_IS_COMPRESSED | ATTR_IS_ENCRYPTED)) - return -EOPNOTSUPP; - if (attr->resident) { - void *v; - if (newsize > ino->vol->mft_record_size) { - error = ntfs_make_attr_nonresident(ino, attr); - if (error) - return error; - return ntfs_resize_attr(ino, attr, newsize); - } - v = attr->d.data; - if (newsize) { - __s64 minsize = newsize; - attr->d.data = ntfs_malloc(newsize); - if (!attr->d.data) { - ntfs_free(v); - return -ENOMEM; - } - if (newsize > oldsize) { - minsize = oldsize; - ntfs_bzero((char*)attr->d.data + oldsize, - newsize - oldsize); - } - ntfs_memcpy((char*)attr->d.data, v, minsize); - } else - attr->d.data = 0; - ntfs_free(v); - attr->size = newsize; - return 0; - } - /* Non-resident attribute. */ - rl = attr->d.r.runlist; - if (newsize < oldsize) { - int rl_size; - /* - * FIXME: We might be going awfully wrong for newsize = 0, - * possibly even leaking memory really badly. But considering - * in that case there is more breakage due to -EOPNOTSUPP stuff - * further down the code path, who cares for the moment... (AIA) - */ - for (i = 0, count = 0; i < attr->d.r.len; i++) { - if ((__s64)(count + rl[i].len) << clustersizebits > - newsize) { - i++; - break; - } - count += (int)rl[i].len; - } - newcount = count; - /* Free unused clusters in current run, unless sparse. */ - if (rl[--i].lcn != (ntfs_cluster_t)-1) { - ntfs_cluster_t rounded = newsize - ((__s64)count << - clustersizebits); - rounded = (rounded + ino->vol->cluster_size - 1) >> - clustersizebits; - error = ntfs_deallocate_cluster_run(ino->vol, - rl[i].lcn + rounded, - rl[i].len - rounded); - if (error) - return error; /* FIXME: Incomplete operation. */ - rl[i].len = rounded; - newcount = count + rounded; - } - /* Free all other runs. */ - i++; - error = ntfs_deallocate_clusters(ino->vol, rl + i, - attr->d.r.len - i); - if (error) - return error; /* FIXME: Incomplete operation. */ - /* - * Free space for extra runs in memory if enough memory left - * to do so. FIXME: Only do it if it would free memory. (AIA) - */ - rl_size = ((i + 1) * sizeof(ntfs_runlist) + PAGE_SIZE - 1) & - PAGE_MASK; - if (rl_size < ((attr->d.r.len * sizeof(ntfs_runlist) + - PAGE_SIZE - 1) & PAGE_MASK)) { - rlt = ntfs_vmalloc(rl_size); - if (rlt) { - ntfs_memcpy(rlt, rl, i * sizeof(ntfs_runlist)); - ntfs_vfree(rl); - attr->d.r.runlist = rl = rlt; - } - } - rl[i].lcn = (ntfs_cluster_t)-1; - rl[i].len = (ntfs_cluster_t)0; - attr->d.r.len = i; - } else { - error = ntfs_extend_attr(ino, attr, newsize); - if (error) - return error; /* FIXME: Incomplete operation. */ - newcount = (newsize + ino->vol->cluster_size - 1) >> - clustersizebits; - } - /* Fill in new sizes. */ - attr->allocated = (__s64)newcount << clustersizebits; - attr->size = newsize; - if (attr->initialized > newsize) - attr->initialized = newsize; - if (!newsize) - error = ntfs_make_attr_resident(ino, attr); - return error; -} - -int ntfs_create_attr(ntfs_inode *ino, int anum, char *aname, void *data, - int dsize, ntfs_attribute **rattr) -{ - void *name; - int namelen; - int found, i; - int error; - ntfs_attribute *attr; - - if (dsize > ino->vol->mft_record_size) - /* FIXME: Non-resident attributes. */ - return -EOPNOTSUPP; - if (aname) { - namelen = strlen(aname); - name = ntfs_malloc(2 * namelen); - if (!name) - return -ENOMEM; - ntfs_ascii2uni(name, aname, namelen); - } else { - name = 0; - namelen = 0; - } - error = ntfs_new_attr(ino, anum, name, namelen, data, dsize, &i, - &found); - if (error || found) { - ntfs_free(name); - return error ? error : -EEXIST; - } - *rattr = attr = ino->attrs + i; - /* Allocate a new number. - * FIXME: Should this happen on inode writeback? - * FIXME: Extension records not supported. */ - error = ntfs_allocate_attr_number(ino, &i); - if (error) - return error; - attr->attrno = i; - if (attr->attrno + 1 != NTFS_GETU16(ino->attr + 0x28)) - ntfs_error("UH OH! attr->attrno (%i) != NTFS_GETU16(ino->attr " - "+ 0x28) (%i)\n", attr->attrno, - NTFS_GETU16(ino->attr + 0x28)); - attr->resident = 1; - attr->flags = 0; - attr->cengine = 0; - attr->size = attr->allocated = attr->initialized = dsize; - - /* FIXME: INDEXED information should come from $AttrDef - * Currently, only file names are indexed. As of NTFS v3.0 (Win2k), - * this is no longer true. Different attributes can be indexed now. */ - if (anum == ino->vol->at_file_name) - attr->indexed = 1; - else - attr->indexed = 0; - attr->d.data = ntfs_malloc(dsize); - if (!attr->d.data) - return -ENOMEM; - ntfs_memcpy(attr->d.data, data, dsize); - return 0; -} - -/* - * Non-resident attributes are stored in runs (intervals of clusters). - * - * This function stores in the inode readable information about a non-resident - * attribute. - */ -static int ntfs_process_runs(ntfs_inode *ino, ntfs_attribute* attr, - unsigned char *data) -{ - int startvcn, endvcn; - int vcn, cnum; - ntfs_cluster_t cluster; - int len, ctype; - int er = 0; - startvcn = NTFS_GETS64(data + 0x10); - endvcn = NTFS_GETS64(data + 0x18); - - /* Check whether this chunk really belongs to the end. Problem with - * this: this functions can get called on the last extent first, before - * it is called on the other extents in sequence. This happens when the - * base mft record contains the last extent instead of the first one - * and the first extent is stored, like any intermediate extents in - * extension mft records. This would be difficult to allow the way the - * runlist is stored in memory. Thus we fix elsewhere by causing the - * attribute list attribute to be processed immediately when found. The - * extents will then be processed starting with the first one. */ - for (cnum = 0, vcn = 0; cnum < attr->d.r.len; cnum++) - vcn += attr->d.r.runlist[cnum].len; - if (vcn != startvcn) { - ntfs_debug(DEBUG_FILE3, "ntfs_process_runs: ino = 0x%x, " - "attr->type = 0x%x, startvcn = 0x%x, endvcn = 0x%x, " - "vcn = 0x%x, cnum = 0x%x\n", ino->i_number, attr->type, - startvcn, endvcn, vcn, cnum); - if (vcn < startvcn) { - ntfs_error("Problem with runlist in extended record\n"); - return -1; - } - /* Tried to insert an already inserted runlist. */ - return 0; - } - if (!endvcn) { - if (!startvcn) { - /* Allocated length. */ - endvcn = NTFS_GETS64(data + 0x28) - 1; - endvcn >>= ino->vol->cluster_size_bits; - } else { - /* This is an extent. Allocated length is not defined! - * Extents must have an endvcn though so this is an - * error. */ - ntfs_error("Corrupt attribute extent. (endvcn is " - "missing)\n"); - return -1; - } - } - data = data + NTFS_GETU16(data + 0x20); - cnum = attr->d.r.len; - cluster = 0; - for (vcn = startvcn; vcn <= endvcn; vcn += len) { - if (ntfs_decompress_run(&data, &len, &cluster, &ctype)) { - ntfs_debug(DEBUG_FILE3, "ntfs_process_runs: " - "ntfs_decompress_run failed. i_number = 0x%x\n", - ino->i_number); - return -1; - } - if (ctype) - er = ntfs_insert_run(attr, cnum, -1, len); - else - er = ntfs_insert_run(attr, cnum, cluster, len); - if (er) - break; - cnum++; - } - if (er) - ntfs_error("ntfs_process_runs: ntfs_insert_run failed\n"); - ntfs_debug(DEBUG_FILE3, "ntfs_process_runs: startvcn = 0x%x, vcn = 0x%x" - ", endvcn = 0x%x, cnum = %i\n", startvcn, vcn, - endvcn, cnum); - return er; -} - -/* Insert the attribute starting at attr in the inode ino. */ -int ntfs_insert_attribute(ntfs_inode *ino, unsigned char *attrdata) -{ - int i, found; - int type; - short int *name; - int namelen; - void *data; - ntfs_attribute *attr; - int error; - - type = NTFS_GETU32(attrdata); - namelen = NTFS_GETU8(attrdata + 9); - ntfs_debug(DEBUG_FILE3, "ntfs_insert_attribute: ino->i_number 0x%x, " - "attr type 0x%x\n", ino->i_number, type); - /* Read the attribute's name if it has one. */ - if (!namelen) - name = 0; - else { - /* 1 Unicode character fits in 2 bytes. */ - name = ntfs_malloc(2 * namelen); - if (!name) - return -ENOMEM; - ntfs_memcpy(name, attrdata + NTFS_GETU16(attrdata + 10), - 2 * namelen); - } - /* If resident look for value, too. */ - if (NTFS_GETU8(attrdata + 8) == 0) - error = ntfs_new_attr(ino, type, name, namelen, - attrdata + NTFS_GETU16(attrdata + 0x14), - NTFS_GETU16(attrdata + 0x10), &i, &found); - else - error = ntfs_new_attr(ino, type, name, namelen, NULL, 0, &i, - &found); - if (error) { - ntfs_debug(DEBUG_FILE3, "ntfs_insert_attribute: ntfs_new_attr " - "failed.\n"); - if (name) - ntfs_free(name); - return error; - } - if (found) { - /* It's already there, if not resident just process the runs. */ - if (!ino->attrs[i].resident) { - ntfs_debug(DEBUG_FILE3, "ntfs_insert_attribute:" - " processing runs 1.\n"); - /* FIXME: Check error code! (AIA) */ - ntfs_process_runs(ino, ino->attrs + i, attrdata); - } - return 0; - } - attr = ino->attrs + i; - attr->resident = NTFS_GETU8(attrdata + 8) == 0; - attr->flags = *(__u16*)(attrdata + 0xC); - attr->attrno = NTFS_GETU16(attrdata + 0xE); - - if (attr->resident) { - attr->size = NTFS_GETU16(attrdata + 0x10); - data = attrdata + NTFS_GETU16(attrdata + 0x14); - attr->d.data = (void*)ntfs_malloc(attr->size); - if (!attr->d.data) - return -ENOMEM; - ntfs_memcpy(attr->d.data, data, attr->size); - attr->indexed = NTFS_GETU8(attrdata + 0x16); - } else { - attr->allocated = NTFS_GETS64(attrdata + 0x28); - attr->size = NTFS_GETS64(attrdata + 0x30); - attr->initialized = NTFS_GETS64(attrdata + 0x38); - attr->cengine = NTFS_GETU16(attrdata + 0x22); - if (attr->flags & ATTR_IS_COMPRESSED) - attr->compsize = NTFS_GETS64(attrdata + 0x40); - ntfs_debug(DEBUG_FILE3, "ntfs_insert_attribute: " - "attr->allocated = 0x%Lx, attr->size = 0x%Lx, " - "attr->initialized = 0x%Lx\n", attr->allocated, - attr->size, attr->initialized); - ino->attrs[i].d.r.runlist = 0; - ino->attrs[i].d.r.len = 0; - ntfs_debug(DEBUG_FILE3, "ntfs_insert_attribute: processing " - "runs 2.\n"); - /* FIXME: Check error code! (AIA) */ - ntfs_process_runs(ino, attr, attrdata); - } - return 0; -} - -int ntfs_read_zero(ntfs_io *dest, int size) -{ - int i; - char *sparse = ntfs_calloc(512); - if (!sparse) - return -ENOMEM; - i = 512; - while (size) { - if (i > size) - i = size; - dest->fn_put(dest, sparse, i); - size -= i; - } - ntfs_free(sparse); - return 0; -} - -/* Process compressed attributes. */ -int ntfs_read_compressed(ntfs_inode *ino, ntfs_attribute *attr, __s64 offset, - ntfs_io *dest) -{ - int error = 0; - int clustersizebits; - int s_vcn, rnum, vcn, got, l1; - __s64 copied, len, chunk, offs1, l, chunk2; - ntfs_cluster_t cluster, cl1; - char *comp = 0, *comp1; - char *decomp = 0; - ntfs_io io; - ntfs_runlist *rl; - - l = dest->size; - clustersizebits = ino->vol->cluster_size_bits; - /* Starting cluster of potential chunk. There are three situations: - a) In a large uncompressible or sparse chunk, s_vcn is in the middle - of a run. - b) s_vcn is right on a run border. - c) When several runs make a chunk, s_vcn is before the chunks. */ - s_vcn = offset >> clustersizebits; - /* Round down to multiple of 16. */ - s_vcn &= ~15; - rl = attr->d.r.runlist; - for (rnum = vcn = 0; rnum < attr->d.r.len && vcn + rl->len <= s_vcn; - rnum++, rl++) - vcn += rl->len; - if (rnum == attr->d.r.len) { - /* Beyond end of file. */ - /* FIXME: Check allocated / initialized. */ - dest->size = 0; - return 0; - } - io.do_read = 1; - io.fn_put = ntfs_put; - io.fn_get = 0; - cluster = rl->lcn; - len = rl->len; - copied = 0; - while (l) { - chunk = 0; - if (cluster == (ntfs_cluster_t)-1) { - /* Sparse cluster. */ - __s64 ll; - - if ((len - (s_vcn - vcn)) & 15) - ntfs_error("Unexpected sparse chunk size."); - ll = ((__s64)(vcn + len) << clustersizebits) - offset; - if (ll > l) - ll = l; - chunk = ll; - error = ntfs_read_zero(dest, ll); - if (error) - goto out; - } else if (dest->do_read) { - if (!comp) { - comp = ntfs_malloc(16 << clustersizebits); - if (!comp) { - error = -ENOMEM; - goto out; - } - } - got = 0; - /* We might need to start in the middle of a run. */ - cl1 = cluster + s_vcn - vcn; - comp1 = comp; - do { - int delta; - - io.param = comp1; - delta = s_vcn - vcn; - if (delta < 0) - delta = 0; - l1 = len - delta; - if (l1 > 16 - got) - l1 = 16 - got; - io.size = (__s64)l1 << clustersizebits; - error = ntfs_getput_clusters(ino->vol, cl1, 0, - &io); - if (error) - goto out; - if (l1 + delta == len) { - rnum++; - rl++; - vcn += len; - cluster = cl1 = rl->lcn; - len = rl->len; - } - got += l1; - comp1 += (__s64)l1 << clustersizebits; - } while (cluster != (ntfs_cluster_t)-1 && got < 16); - /* Until empty run. */ - chunk = 16 << clustersizebits; - if (cluster != (ntfs_cluster_t)-1 || got == 16) - /* Uncompressible */ - comp1 = comp; - else { - if (!decomp) { - decomp = ntfs_malloc(16 << - clustersizebits); - if (!decomp) { - error = -ENOMEM; - goto out; - } - } - /* Make sure there are null bytes after the - * last block. */ - *(ntfs_u32*)comp1 = 0; - ntfs_decompress(decomp, comp, chunk); - comp1 = decomp; - } - offs1 = offset - ((__s64)s_vcn << clustersizebits); - chunk2 = (16 << clustersizebits) - offs1; - if (chunk2 > l) - chunk2 = l; - if (chunk > chunk2) - chunk = chunk2; - dest->fn_put(dest, comp1 + offs1, chunk); - } - l -= chunk; - copied += chunk; - offset += chunk; - s_vcn = (offset >> clustersizebits) & ~15; - if (l && offset >= ((__s64)(vcn + len) << clustersizebits)) { - rnum++; - rl++; - vcn += len; - cluster = rl->lcn; - len = rl->len; - } - } -out: - if (comp) - ntfs_free(comp); - if (decomp) - ntfs_free(decomp); - dest->size = copied; - return error; -} - -int ntfs_write_compressed(ntfs_inode *ino, ntfs_attribute *attr, __s64 offset, - ntfs_io *dest) -{ - return -EOPNOTSUPP; -} - diff -urN linux-2.4.24-vanilla/fs/ntfs/attr.h linux-2.4.24-ntfs-2.1.6a/fs/ntfs/attr.h --- linux-2.4.24-vanilla/fs/ntfs/attr.h 2001-09-08 20:24:40.000000000 +0100 +++ linux-2.4.24-ntfs-2.1.6a/fs/ntfs/attr.h 1970-01-01 01:00:00.000000000 +0100 @@ -1,38 +0,0 @@ -/* - * attr.h - Header file for attr.c - * - * Copyright (C) 1997 Régis Duchesne - * Copyright (c) 2001 Anton Altaparmakov (AIA) - */ -#include - -ntfs_u8* ntfs_find_attr_in_mft_rec(ntfs_volume *vol, ntfs_u8 *m, __u32 type, - wchar_t *name, __u32 name_len, int ic, __u16 instance); - -int ntfs_extend_attr(ntfs_inode *ino, ntfs_attribute *attr, const __s64 len); - -int ntfs_resize_attr(ntfs_inode *ino, ntfs_attribute *attr, __s64 newsize); - -int ntfs_insert_attribute(ntfs_inode *ino, unsigned char* attrdata); - -int ntfs_read_compressed(ntfs_inode *ino, ntfs_attribute *attr, __s64 offset, - ntfs_io *dest); - -int ntfs_write_compressed(ntfs_inode *ino, ntfs_attribute *attr, __s64 offset, - ntfs_io *dest); - -int ntfs_create_attr(ntfs_inode *ino, int anum, char *aname, void *data, - int dsize, ntfs_attribute **rattr); - -int ntfs_read_zero(ntfs_io *dest, int size); - -int ntfs_make_attr_nonresident(ntfs_inode *ino, ntfs_attribute *attr); - -int ntfs_attr_allnonresident(ntfs_inode *ino); - -int ntfs_new_attr(ntfs_inode *ino, int type, void *name, int namelen, - void *value, int value_len, int *pos, int *found); - -int ntfs_insert_run(ntfs_attribute *attr, int cnum, ntfs_cluster_t cluster, - int len); - diff -urN linux-2.4.24-vanilla/fs/ntfs/attrib.c linux-2.4.24-ntfs-2.1.6a/fs/ntfs/attrib.c --- linux-2.4.24-vanilla/fs/ntfs/attrib.c 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.4.24-ntfs-2.1.6a/fs/ntfs/attrib.c 2004-01-21 14:28:25.000000000 +0000 @@ -0,0 +1,1720 @@ +/** + * attrib.c - NTFS attribute operations. Part of the Linux-NTFS project. + * + * Copyright (c) 2001-2003 Anton Altaparmakov + * Copyright (c) 2002 Richard Russon + * + * This program/include file is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as published + * by the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program/include file is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program (in the main directory of the Linux-NTFS + * distribution in the file COPYING); if not, write to the Free Software + * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "ntfs.h" +#include "dir.h" + +/* Temporary helper functions -- might become macros */ + +/** + * ntfs_rl_mm - run_list memmove + * + * It is up to the caller to serialize access to the run list @base. + */ +static inline void ntfs_rl_mm(run_list_element *base, int dst, int src, + int size) +{ + if (likely((dst != src) && (size > 0))) + memmove(base + dst, base + src, size * sizeof (*base)); +} + +/** + * ntfs_rl_mc - run_list memory copy + * + * It is up to the caller to serialize access to the run lists @dstbase and + * @srcbase. + */ +static inline void ntfs_rl_mc(run_list_element *dstbase, int dst, + run_list_element *srcbase, int src, int size) +{ + if (likely(size > 0)) + memcpy(dstbase + dst, srcbase + src, size * sizeof(*dstbase)); +} + +/** + * ntfs_rl_realloc - Reallocate memory for run_lists + * @rl: original run list + * @old_size: number of run list elements in the original run list @rl + * @new_size: number of run list elements we need space for + * + * As the run_lists grow, more memory will be required. To prevent the + * kernel having to allocate and reallocate large numbers of small bits of + * memory, this function returns and entire page of memory. + * + * It is up to the caller to serialize access to the run list @rl. + * + * N.B. If the new allocation doesn't require a different number of pages in + * memory, the function will return the original pointer. + * + * On success, return a pointer to the newly allocated, or recycled, memory. + * On error, return -errno. The following error codes are defined: + * -ENOMEM - Not enough memory to allocate run list array. + * -EINVAL - Invalid parameters were passed in. + */ +static inline run_list_element *ntfs_rl_realloc(run_list_element *rl, + int old_size, int new_size) +{ + run_list_element *new_rl; + + old_size = PAGE_ALIGN(old_size * sizeof(*rl)); + new_size = PAGE_ALIGN(new_size * sizeof(*rl)); + if (old_size == new_size) + return rl; + + new_rl = ntfs_malloc_nofs(new_size); + if (unlikely(!new_rl)) + return ERR_PTR(-ENOMEM); + + if (likely(rl != NULL)) { + if (unlikely(old_size > new_size)) + old_size = new_size; + memcpy(new_rl, rl, old_size); + ntfs_free(rl); + } + return new_rl; +} + +/** + * ntfs_are_rl_mergeable - test if two run lists can be joined together + * @dst: original run list + * @src: new run list to test for mergeability with @dst + * + * Test if two run lists can be joined together. For this, their VCNs and LCNs + * must be adjacent. + * + * It is up to the caller to serialize access to the run lists @dst and @src. + * + * Return: TRUE Success, the run lists can be merged. + * FALSE Failure, the run lists cannot be merged. + */ +static inline BOOL ntfs_are_rl_mergeable(run_list_element *dst, + run_list_element *src) +{ + BUG_ON(!dst); + BUG_ON(!src); + + if ((dst->lcn < 0) || (src->lcn < 0)) /* Are we merging holes? */ + return FALSE; + if ((dst->lcn + dst->length) != src->lcn) /* Are the runs contiguous? */ + return FALSE; + if ((dst->vcn + dst->length) != src->vcn) /* Are the runs misaligned? */ + return FALSE; + + return TRUE; +} + +/** + * __ntfs_rl_merge - merge two run lists without testing if they can be merged + * @dst: original, destination run list + * @src: new run list to merge with @dst + * + * Merge the two run lists, writing into the destination run list @dst. The + * caller must make sure the run lists can be merged or this will corrupt the + * destination run list. + * + * It is up to the caller to serialize access to the run lists @dst and @src. + */ +static inline void __ntfs_rl_merge(run_list_element *dst, run_list_element *src) +{ + dst->length += src->length; +} + +/** + * ntfs_rl_merge - test if two run lists can be joined together and merge them + * @dst: original, destination run list + * @src: new run list to merge with @dst + * + * Test if two run lists can be joined together. For this, their VCNs and LCNs + * must be adjacent. If they can be merged, perform the merge, writing into + * the destination run list @dst. + * + * It is up to the caller to serialize access to the run lists @dst and @src. + * + * Return: TRUE Success, the run lists have been merged. + * FALSE Failure, the run lists cannot be merged and have not been + * modified. + */ +static inline BOOL ntfs_rl_merge(run_list_element *dst, run_list_element *src) +{ + BOOL merge = ntfs_are_rl_mergeable(dst, src); + + if (merge) + __ntfs_rl_merge(dst, src); + return merge; +} + +/** + * ntfs_rl_append - append a run list after a given element + * @dst: original run list to be worked on + * @dsize: number of elements in @dst (including end marker) + * @src: run list to be inserted into @dst + * @ssize: number of elements in @src (excluding end marker) + * @loc: append the new run list @src after this element in @dst + * + * Append the run list @src after element @loc in @dst. Merge the right end of + * the new run list, if necessary. Adjust the size of the hole before the + * appended run list. + * + * It is up to the caller to serialize access to the run lists @dst and @src. + * + * On success, return a pointer to the new, combined, run list. Note, both + * run lists @dst and @src are deallocated before returning so you cannot use + * the pointers for anything any more. (Strictly speaking the returned run list + * may be the same as @dst but this is irrelevant.) + * + * On error, return -errno. Both run lists are left unmodified. The following + * error codes are defined: + * -ENOMEM - Not enough memory to allocate run list array. + * -EINVAL - Invalid parameters were passed in. + */ +static inline run_list_element *ntfs_rl_append(run_list_element *dst, + int dsize, run_list_element *src, int ssize, int loc) +{ + BOOL right; + int magic; + + BUG_ON(!dst); + BUG_ON(!src); + + /* First, check if the right hand end needs merging. */ + right = ntfs_are_rl_mergeable(src + ssize - 1, dst + loc + 1); + + /* Space required: @dst size + @src size, less one if we merged. */ + dst = ntfs_rl_realloc(dst, dsize, dsize + ssize - right); + if (IS_ERR(dst)) + return dst; + /* + * We are guaranteed to succeed from here so can start modifying the + * original run lists. + */ + + /* First, merge the right hand end, if necessary. */ + if (right) + __ntfs_rl_merge(src + ssize - 1, dst + loc + 1); + + magic = loc + ssize; + + /* Move the tail of @dst out of the way, then copy in @src. */ + ntfs_rl_mm(dst, magic + 1, loc + 1 + right, dsize - loc - 1 - right); + ntfs_rl_mc(dst, loc + 1, src, 0, ssize); + + /* Adjust the size of the preceding hole. */ + dst[loc].length = dst[loc + 1].vcn - dst[loc].vcn; + + /* We may have changed the length of the file, so fix the end marker */ + if (dst[magic + 1].lcn == LCN_ENOENT) + dst[magic + 1].vcn = dst[magic].vcn + dst[magic].length; + + return dst; +} + +/** + * ntfs_rl_insert - insert a run list into another + * @dst: original run list to be worked on + * @dsize: number of elements in @dst (including end marker) + * @src: new run list to be inserted + * @ssize: number of elements in @src (excluding end marker) + * @loc: insert the new run list @src before this element in @dst + * + * Insert the run list @src before element @loc in the run list @dst. Merge the + * left end of the new run list, if necessary. Adjust the size of the hole + * after the inserted run list. + * + * It is up to the caller to serialize access to the run lists @dst and @src. + * + * On success, return a pointer to the new, combined, run list. Note, both + * run lists @dst and @src are deallocated before returning so you cannot use + * the pointers for anything any more. (Strictly speaking the returned run list + * may be the same as @dst but this is irrelevant.) + * + * On error, return -errno. Both run lists are left unmodified. The following + * error codes are defined: + * -ENOMEM - Not enough memory to allocate run list array. + * -EINVAL - Invalid parameters were passed in. + */ +static inline run_list_element *ntfs_rl_insert(run_list_element *dst, + int dsize, run_list_element *src, int ssize, int loc) +{ + BOOL left = FALSE; + BOOL disc = FALSE; /* Discontinuity */ + BOOL hole = FALSE; /* Following a hole */ + int magic; + + BUG_ON(!dst); + BUG_ON(!src); + + /* disc => Discontinuity between the end of @dst and the start of @src. + * This means we might need to insert a hole. + * hole => @dst ends with a hole or an unmapped region which we can + * extend to match the discontinuity. */ + if (loc == 0) + disc = (src[0].vcn > 0); + else { + s64 merged_length; + + left = ntfs_are_rl_mergeable(dst + loc - 1, src); + + merged_length = dst[loc - 1].length; + if (left) + merged_length += src->length; + + disc = (src[0].vcn > dst[loc - 1].vcn + merged_length); + if (disc) + hole = (dst[loc - 1].lcn == LCN_HOLE); + } + + /* Space required: @dst size + @src size, less one if we merged, plus + * one if there was a discontinuity, less one for a trailing hole. */ + dst = ntfs_rl_realloc(dst, dsize, dsize + ssize - left + disc - hole); + if (IS_ERR(dst)) + return dst; + /* + * We are guaranteed to succeed from here so can start modifying the + * original run list. + */ + + if (left) + __ntfs_rl_merge(dst + loc - 1, src); + + magic = loc + ssize - left + disc - hole; + + /* Move the tail of @dst out of the way, then copy in @src. */ + ntfs_rl_mm(dst, magic, loc, dsize - loc); + ntfs_rl_mc(dst, loc + disc - hole, src, left, ssize - left); + + /* Adjust the VCN of the last run ... */ + if (dst[magic].lcn <= LCN_HOLE) + dst[magic].vcn = dst[magic - 1].vcn + dst[magic - 1].length; + /* ... and the length. */ + if (dst[magic].lcn == LCN_HOLE || dst[magic].lcn == LCN_RL_NOT_MAPPED) + dst[magic].length = dst[magic + 1].vcn - dst[magic].vcn; + + /* Writing beyond the end of the file and there's a discontinuity. */ + if (disc) { + if (hole) + dst[loc - 1].length = dst[loc].vcn - dst[loc - 1].vcn; + else { + if (loc > 0) { + dst[loc].vcn = dst[loc - 1].vcn + + dst[loc - 1].length; + dst[loc].length = dst[loc + 1].vcn - + dst[loc].vcn; + } else { + dst[loc].vcn = 0; + dst[loc].length = dst[loc + 1].vcn; + } + dst[loc].lcn = LCN_RL_NOT_MAPPED; + } + + magic += hole; + + if (dst[magic].lcn == LCN_ENOENT) + dst[magic].vcn = dst[magic - 1].vcn + + dst[magic - 1].length; + } + return dst; +} + +/** + * ntfs_rl_replace - overwrite a run_list element with another run list + * @dst: original run list to be worked on + * @dsize: number of elements in @dst (including end marker) + * @src: new run list to be inserted + * @ssize: number of elements in @src (excluding end marker) + * @loc: index in run list @dst to overwrite with @src + * + * Replace the run list element @dst at @loc with @src. Merge the left and + * right ends of the inserted run list, if necessary. + * + * It is up to the caller to serialize access to the run lists @dst and @src. + * + * On success, return a pointer to the new, combined, run list. Note, both + * run lists @dst and @src are deallocated before returning so you cannot use + * the pointers for anything any more. (Strictly speaking the returned run list + * may be the same as @dst but this is irrelevant.) + * + * On error, return -errno. Both run lists are left unmodified. The following + * error codes are defined: + * -ENOMEM - Not enough memory to allocate run list array. + * -EINVAL - Invalid parameters were passed in. + */ +static inline run_list_element *ntfs_rl_replace(run_list_element *dst, + int dsize, run_list_element *src, int ssize, int loc) +{ + BOOL left = FALSE; + BOOL right; + int magic; + + BUG_ON(!dst); + BUG_ON(!src); + + /* First, merge the left and right ends, if necessary. */ + right = ntfs_are_rl_mergeable(src + ssize - 1, dst + loc + 1); + if (loc > 0) + left = ntfs_are_rl_mergeable(dst + loc - 1, src); + + /* Allocate some space. We'll need less if the left, right, or both + * ends were merged. */ + dst = ntfs_rl_realloc(dst, dsize, dsize + ssize - left - right); + if (IS_ERR(dst)) + return dst; + /* + * We are guaranteed to succeed from here so can start modifying the + * original run lists. + */ + if (right) + __ntfs_rl_merge(src + ssize - 1, dst + loc + 1); + if (left) + __ntfs_rl_merge(dst + loc - 1, src); + + /* FIXME: What does this mean? (AIA) */ + magic = loc + ssize - left; + + /* Move the tail of @dst out of the way, then copy in @src. */ + ntfs_rl_mm(dst, magic, loc + right + 1, dsize - loc - right - 1); + ntfs_rl_mc(dst, loc, src, left, ssize - left); + + /* We may have changed the length of the file, so fix the end marker */ + if (dst[magic].lcn == LCN_ENOENT) + dst[magic].vcn = dst[magic - 1].vcn + dst[magic - 1].length; + return dst; +} + +/** + * ntfs_rl_split - insert a run list into the centre of a hole + * @dst: original run list to be worked on + * @dsize: number of elements in @dst (including end marker) + * @src: new run list to be inserted + * @ssize: number of elements in @src (excluding end marker) + * @loc: index in run list @dst at which to split and insert @src + * + * Split the run list @dst at @loc into two and insert @new in between the two + * fragments. No merging of run lists is necessary. Adjust the size of the + * holes either side. + * + * It is up to the caller to serialize access to the run lists @dst and @src. + * + * On success, return a pointer to the new, combined, run list. Note, both + * run lists @dst and @src are deallocated before returning so you cannot use + * the pointers for anything any more. (Strictly speaking the returned run list + * may be the same as @dst but this is irrelevant.) + * + * On error, return -errno. Both run lists are left unmodified. The following + * error codes are defined: + * -ENOMEM - Not enough memory to allocate run list array. + * -EINVAL - Invalid parameters were passed in. + */ +static inline run_list_element *ntfs_rl_split(run_list_element *dst, int dsize, + run_list_element *src, int ssize, int loc) +{ + BUG_ON(!dst); + BUG_ON(!src); + + /* Space required: @dst size + @src size + one new hole. */ + dst = ntfs_rl_realloc(dst, dsize, dsize + ssize + 1); + if (IS_ERR(dst)) + return dst; + /* + * We are guaranteed to succeed from here so can start modifying the + * original run lists. + */ + + /* Move the tail of @dst out of the way, then copy in @src. */ + ntfs_rl_mm(dst, loc + 1 + ssize, loc, dsize - loc); + ntfs_rl_mc(dst, loc + 1, src, 0, ssize); + + /* Adjust the size of the holes either size of @src. */ + dst[loc].length = dst[loc+1].vcn - dst[loc].vcn; + dst[loc+ssize+1].vcn = dst[loc+ssize].vcn + dst[loc+ssize].length; + dst[loc+ssize+1].length = dst[loc+ssize+2].vcn - dst[loc+ssize+1].vcn; + + return dst; +} + +/** + * ntfs_merge_run_lists - merge two run_lists into one + * @drl: original run list to be worked on + * @srl: new run list to be merged into @drl + * + * First we sanity check the two run lists @srl and @drl to make sure that they + * are sensible and can be merged. The run list @srl must be either after the + * run list @drl or completely within a hole (or unmapped region) in @drl. + * + * It is up to the caller to serialize access to the run lists @drl and @srl. + * + * Merging of run lists is necessary in two cases: + * 1. When attribute lists are used and a further extent is being mapped. + * 2. When new clusters are allocated to fill a hole or extend a file. + * + * There are four possible ways @srl can be merged. It can: + * - be inserted at the beginning of a hole, + * - split the hole in two and be inserted between the two fragments, + * - be appended at the end of a hole, or it can + * - replace the whole hole. + * It can also be appended to the end of the run list, which is just a variant + * of the insert case. + * + * On success, return a pointer to the new, combined, run list. Note, both + * run lists @drl and @srl are deallocated before returning so you cannot use + * the pointers for anything any more. (Strictly speaking the returned run list + * may be the same as @dst but this is irrelevant.) + * + * On error, return -errno. Both run lists are left unmodified. The following + * error codes are defined: + * -ENOMEM - Not enough memory to allocate run list array. + * -EINVAL - Invalid parameters were passed in. + * -ERANGE - The run lists overlap and cannot be merged. + */ +run_list_element *ntfs_merge_run_lists(run_list_element *drl, + run_list_element *srl) +{ + int di, si; /* Current index into @[ds]rl. */ + int sstart; /* First index with lcn > LCN_RL_NOT_MAPPED. */ + int dins; /* Index into @drl at which to insert @srl. */ + int dend, send; /* Last index into @[ds]rl. */ + int dfinal, sfinal; /* The last index into @[ds]rl with + lcn >= LCN_HOLE. */ + int marker = 0; + VCN marker_vcn = 0; + +#ifdef DEBUG + ntfs_debug("dst:"); + ntfs_debug_dump_runlist(drl); + ntfs_debug("src:"); + ntfs_debug_dump_runlist(srl); +#endif + + /* Check for silly calling... */ + if (unlikely(!srl)) + return drl; + if (unlikely(IS_ERR(srl) || IS_ERR(drl))) + return ERR_PTR(-EINVAL); + + /* Check for the case where the first mapping is being done now. */ + if (unlikely(!drl)) { + drl = srl; + /* Complete the source run list if necessary. */ + if (unlikely(drl[0].vcn)) { + /* Scan to the end of the source run list. */ + for (dend = 0; likely(drl[dend].length); dend++) + ; + drl = ntfs_rl_realloc(drl, dend, dend + 1); + if (IS_ERR(drl)) + return drl; + /* Insert start element at the front of the run list. */ + ntfs_rl_mm(drl, 1, 0, dend); + drl[0].vcn = 0; + drl[0].lcn = LCN_RL_NOT_MAPPED; + drl[0].length = drl[1].vcn; + } + goto finished; + } + + si = di = 0; + + /* Skip any unmapped start element(s) in the source run_list. */ + while (srl[si].length && srl[si].lcn < (LCN)LCN_HOLE) + si++; + + /* Can't have an entirely unmapped source run list. */ + BUG_ON(!srl[si].length); + + /* Record the starting points. */ + sstart = si; + + /* + * Skip forward in @drl until we reach the position where @srl needs to + * be inserted. If we reach the end of @drl, @srl just needs to be + * appended to @drl. + */ + for (; drl[di].length; di++) { + if (drl[di].vcn + drl[di].length > srl[sstart].vcn) + break; + } + dins = di; + + /* Sanity check for illegal overlaps. */ + if ((drl[di].vcn == srl[si].vcn) && (drl[di].lcn >= 0) && + (srl[si].lcn >= 0)) { + ntfs_error(NULL, "Run lists overlap. Cannot merge!"); + return ERR_PTR(-ERANGE); + } + + /* Scan to the end of both run lists in order to know their sizes. */ + for (send = si; srl[send].length; send++) + ; + for (dend = di; drl[dend].length; dend++) + ; + + if (srl[send].lcn == (LCN)LCN_ENOENT) + marker_vcn = srl[marker = send].vcn; + + /* Scan to the last element with lcn >= LCN_HOLE. */ + for (sfinal = send; sfinal >= 0 && srl[sfinal].lcn < LCN_HOLE; sfinal--) + ; + for (dfinal = dend; dfinal >= 0 && drl[dfinal].lcn < LCN_HOLE; dfinal--) + ; + + { + BOOL start; + BOOL finish; + int ds = dend + 1; /* Number of elements in drl & srl */ + int ss = sfinal - sstart + 1; + + start = ((drl[dins].lcn < LCN_RL_NOT_MAPPED) || /* End of file */ + (drl[dins].vcn == srl[sstart].vcn)); /* Start of hole */ + finish = ((drl[dins].lcn >= LCN_RL_NOT_MAPPED) && /* End of file */ + ((drl[dins].vcn + drl[dins].length) <= /* End of hole */ + (srl[send - 1].vcn + srl[send - 1].length))); + + /* Or we'll lose an end marker */ + if (start && finish && (drl[dins].length == 0)) + ss++; + if (marker && (drl[dins].vcn + drl[dins].length > srl[send - 1].vcn)) + finish = FALSE; +#if 0 + ntfs_debug("dfinal = %i, dend = %i", dfinal, dend); + ntfs_debug("sstart = %i, sfinal = %i, send = %i", sstart, sfinal, send); + ntfs_debug("start = %i, finish = %i", start, finish); + ntfs_debug("ds = %i, ss = %i, dins = %i", ds, ss, dins); +#endif + if (start) { + if (finish) + drl = ntfs_rl_replace(drl, ds, srl + sstart, ss, dins); + else + drl = ntfs_rl_insert(drl, ds, srl + sstart, ss, dins); + } else { + if (finish) + drl = ntfs_rl_append(drl, ds, srl + sstart, ss, dins); + else + drl = ntfs_rl_split(drl, ds, srl + sstart, ss, dins); + } + if (IS_ERR(drl)) { + ntfs_error(NULL, "Merge failed."); + return drl; + } + ntfs_free(srl); + if (marker) { + ntfs_debug("Triggering marker code."); + for (ds = dend; drl[ds].length; ds++) + ; + /* We only need to care if @srl ended after @drl. */ + if (drl[ds].vcn <= marker_vcn) { + int slots = 0; + + if (drl[ds].vcn == marker_vcn) { + ntfs_debug("Old marker = 0x%Lx, replacing with " + "LCN_ENOENT.\n", + (unsigned long long) + drl[ds].lcn); + drl[ds].lcn = (LCN)LCN_ENOENT; + goto finished; + } + /* + * We need to create an unmapped run list element in + * @drl or extend an existing one before adding the + * ENOENT terminator. + */ + if (drl[ds].lcn == (LCN)LCN_ENOENT) { + ds--; + slots = 1; + } + if (drl[ds].lcn != (LCN)LCN_RL_NOT_MAPPED) { + /* Add an unmapped run list element. */ + if (!slots) { + /* FIXME/TODO: We need to have the + * extra memory already! (AIA) */ + drl = ntfs_rl_realloc(drl, ds, ds + 2); + if (!drl) + goto critical_error; + slots = 2; + } + ds++; + /* Need to set vcn if it isn't set already. */ + if (slots != 1) + drl[ds].vcn = drl[ds - 1].vcn + + drl[ds - 1].length; + drl[ds].lcn = (LCN)LCN_RL_NOT_MAPPED; + /* We now used up a slot. */ + slots--; + } + drl[ds].length = marker_vcn - drl[ds].vcn; + /* Finally add the ENOENT terminator. */ + ds++; + if (!slots) { + /* FIXME/TODO: We need to have the extra + * memory already! (AIA) */ + drl = ntfs_rl_realloc(drl, ds, ds + 1); + if (!drl) + goto critical_error; + } + drl[ds].vcn = marker_vcn; + drl[ds].lcn = (LCN)LCN_ENOENT; + drl[ds].length = (s64)0; + } + } + } + +finished: + /* The merge was completed successfully. */ + ntfs_debug("Merged run list:"); + ntfs_debug_dump_runlist(drl); + return drl; + +critical_error: + /* Critical error! We cannot afford to fail here. */ + ntfs_error(NULL, "Critical error! Not enough memory."); + panic("NTFS: Cannot continue."); +} + +/** + * decompress_mapping_pairs - convert mapping pairs array to run list + * @vol: ntfs volume on which the attribute resides + * @attr: attribute record whose mapping pairs array to decompress + * @old_rl: optional run list in which to insert @attr's run list + * + * It is up to the caller to serialize access to the run list @old_rl. + * + * Decompress the attribute @attr's mapping pairs array into a run list. On + * success, return the decompressed run list. + * + * If @old_rl is not NULL, decompressed run list is inserted into the + * appropriate place in @old_rl and the resultant, combined run list is + * returned. The original @old_rl is deallocated. + * + * On error, return -errno. @old_rl is left unmodified in that case. + * + * The following error codes are defined: + * -ENOMEM - Not enough memory to allocate run list array. + * -EIO - Corrupt run list. + * -EINVAL - Invalid parameters were passed in. + * -ERANGE - The two run lists overlap. + * + * FIXME: For now we take the conceptionally simplest approach of creating the + * new run list disregarding the already existing one and then splicing the + * two into one, if that is possible (we check for overlap and discard the new + * run list if overlap present before returning ERR_PTR(-ERANGE)). + */ +run_list_element *decompress_mapping_pairs(const ntfs_volume *vol, + const ATTR_RECORD *attr, run_list_element *old_rl) +{ + VCN vcn; /* Current vcn. */ + LCN lcn; /* Current lcn. */ + s64 deltaxcn; /* Change in [vl]cn. */ + run_list_element *rl; /* The output run list. */ + u8 *buf; /* Current position in mapping pairs array. */ + u8 *attr_end; /* End of attribute. */ + int rlsize; /* Size of run list buffer. */ + u16 rlpos; /* Current run list position in units of + run_list_elements. */ + u8 b; /* Current byte offset in buf. */ + +#ifdef DEBUG + /* Make sure attr exists and is non-resident. */ + if (!attr || !attr->non_resident || sle64_to_cpu( + attr->data.non_resident.lowest_vcn) < (VCN)0) { + ntfs_error(vol->sb, "Invalid arguments."); + return ERR_PTR(-EINVAL); + } +#endif + /* Start at vcn = lowest_vcn and lcn 0. */ + vcn = sle64_to_cpu(attr->data.non_resident.lowest_vcn); + lcn = 0; + /* Get start of the mapping pairs array. */ + buf = (u8*)attr + le16_to_cpu( + attr->data.non_resident.mapping_pairs_offset); + attr_end = (u8*)attr + le32_to_cpu(attr->length); + if (unlikely(buf < (u8*)attr || buf > attr_end)) { + ntfs_error(vol->sb, "Corrupt attribute."); + return ERR_PTR(-EIO); + } + /* Current position in run list array. */ + rlpos = 0; + /* Allocate first page and set current run list size to one page. */ + rl = ntfs_malloc_nofs(rlsize = PAGE_SIZE); + if (unlikely(!rl)) + return ERR_PTR(-ENOMEM); + /* Insert unmapped starting element if necessary. */ + if (vcn) { + rl->vcn = (VCN)0; + rl->lcn = (LCN)LCN_RL_NOT_MAPPED; + rl->length = vcn; + rlpos++; + } + while (buf < attr_end && *buf) { + /* + * Allocate more memory if needed, including space for the + * not-mapped and terminator elements. ntfs_malloc_nofs() + * operates on whole pages only. + */ + if (((rlpos + 3) * sizeof(*old_rl)) > rlsize) { + run_list_element *rl2; + + rl2 = ntfs_malloc_nofs(rlsize + (int)PAGE_SIZE); + if (unlikely(!rl2)) { + ntfs_free(rl); + return ERR_PTR(-ENOMEM); + } + memcpy(rl2, rl, rlsize); + ntfs_free(rl); + rl = rl2; + rlsize += PAGE_SIZE; + } + /* Enter the current vcn into the current run_list element. */ + rl[rlpos].vcn = vcn; + /* + * Get the change in vcn, i.e. the run length in clusters. + * Doing it this way ensures that we signextend negative values. + * A negative run length doesn't make any sense, but hey, I + * didn't make up the NTFS specs and Windows NT4 treats the run + * length as a signed value so that's how it is... + */ + b = *buf & 0xf; + if (b) { + if (unlikely(buf + b > attr_end)) + goto io_error; + for (deltaxcn = (s8)buf[b--]; b; b--) + deltaxcn = (deltaxcn << 8) + buf[b]; + } else { /* The length entry is compulsory. */ + ntfs_error(vol->sb, "Missing length entry in mapping " + "pairs array."); + deltaxcn = (s64)-1; + } + /* + * Assume a negative length to indicate data corruption and + * hence clean-up and return NULL. + */ + if (unlikely(deltaxcn < 0)) { + ntfs_error(vol->sb, "Invalid length in mapping pairs " + "array."); + goto err_out; + } + /* + * Enter the current run length into the current run list + * element. + */ + rl[rlpos].length = deltaxcn; + /* Increment the current vcn by the current run length. */ + vcn += deltaxcn; + /* + * There might be no lcn change at all, as is the case for + * sparse clusters on NTFS 3.0+, in which case we set the lcn + * to LCN_HOLE. + */ + if (!(*buf & 0xf0)) + rl[rlpos].lcn = (LCN)LCN_HOLE; + else { + /* Get the lcn change which really can be negative. */ + u8 b2 = *buf & 0xf; + b = b2 + ((*buf >> 4) & 0xf); + if (buf + b > attr_end) + goto io_error; + for (deltaxcn = (s8)buf[b--]; b > b2; b--) + deltaxcn = (deltaxcn << 8) + buf[b]; + /* Change the current lcn to its new value. */ + lcn += deltaxcn; +#ifdef DEBUG + /* + * On NTFS 1.2-, apparently can have lcn == -1 to + * indicate a hole. But we haven't verified ourselves + * whether it is really the lcn or the deltaxcn that is + * -1. So if either is found give us a message so we + * can investigate it further! + */ + if (vol->major_ver < 3) { + if (unlikely(deltaxcn == (LCN)-1)) + ntfs_error(vol->sb, "lcn delta == -1"); + if (unlikely(lcn == (LCN)-1)) + ntfs_error(vol->sb, "lcn == -1"); + } +#endif + /* Check lcn is not below -1. */ + if (unlikely(lcn < (LCN)-1)) { + ntfs_error(vol->sb, "Invalid LCN < -1 in " + "mapping pairs array."); + goto err_out; + } + /* Enter the current lcn into the run_list element. */ + rl[rlpos].lcn = lcn; + } + /* Get to the next run_list element. */ + rlpos++; + /* Increment the buffer position to the next mapping pair. */ + buf += (*buf & 0xf) + ((*buf >> 4) & 0xf) + 1; + } + if (unlikely(buf >= attr_end)) + goto io_error; + /* + * If there is a highest_vcn specified, it must be equal to the final + * vcn in the run list - 1, or something has gone badly wrong. + */ + deltaxcn = sle64_to_cpu(attr->data.non_resident.highest_vcn); + if (unlikely(deltaxcn && vcn - 1 != deltaxcn)) { +mpa_err: + ntfs_error(vol->sb, "Corrupt mapping pairs array in " + "non-resident attribute."); + goto err_out; + } + /* Setup not mapped run list element if this is the base extent. */ + if (!attr->data.non_resident.lowest_vcn) { + VCN max_cluster; + + max_cluster = (sle64_to_cpu( + attr->data.non_resident.allocated_size) + + vol->cluster_size - 1) >> + vol->cluster_size_bits; + /* + * If there is a difference between the highest_vcn and the + * highest cluster, the run list is either corrupt or, more + * likely, there are more extents following this one. + */ + if (deltaxcn < --max_cluster) { + ntfs_debug("More extents to follow; deltaxcn = 0x%Lx, " + "max_cluster = 0x%Lx", + (long long)deltaxcn, + (long long)max_cluster); + rl[rlpos].vcn = vcn; + vcn += rl[rlpos].length = max_cluster - deltaxcn; + rl[rlpos].lcn = (LCN)LCN_RL_NOT_MAPPED; + rlpos++; + } else if (unlikely(deltaxcn > max_cluster)) { + ntfs_error(vol->sb, "Corrupt attribute. deltaxcn = " + "0x%Lx, max_cluster = 0x%Lx", + (long long)deltaxcn, + (long long)max_cluster); + goto mpa_err; + } + rl[rlpos].lcn = (LCN)LCN_ENOENT; + } else /* Not the base extent. There may be more extents to follow. */ + rl[rlpos].lcn = (LCN)LCN_RL_NOT_MAPPED; + + /* Setup terminating run_list element. */ + rl[rlpos].vcn = vcn; + rl[rlpos].length = (s64)0; + /* If no existing run list was specified, we are done. */ + if (!old_rl) { + ntfs_debug("Mapping pairs array successfully decompressed:"); + ntfs_debug_dump_runlist(rl); + return rl; + } + /* Now combine the new and old run lists checking for overlaps. */ + old_rl = ntfs_merge_run_lists(old_rl, rl); + if (likely(!IS_ERR(old_rl))) + return old_rl; + ntfs_free(rl); + ntfs_error(vol->sb, "Failed to merge run lists."); + return old_rl; +io_error: + ntfs_error(vol->sb, "Corrupt attribute."); +err_out: + ntfs_free(rl); + return ERR_PTR(-EIO); +} + +/** + * map_run_list - map (a part of) a run list of an ntfs inode + * @ni: ntfs inode for which to map (part of) a run list + * @vcn: map run list part containing this vcn + * + * Map the part of a run list containing the @vcn of an the ntfs inode @ni. + * + * Return 0 on success and -errno on error. + */ +int map_run_list(ntfs_inode *ni, VCN vcn) +{ + ntfs_inode *base_ni; + attr_search_context *ctx; + MFT_RECORD *mrec; + int err = 0; + + ntfs_debug("Mapping run list part containing vcn 0x%Lx.", + (long long)vcn); + + if (!NInoAttr(ni)) + base_ni = ni; + else + base_ni = ni->ext.base_ntfs_ino; + + mrec = map_mft_record(base_ni); + if (IS_ERR(mrec)) + return PTR_ERR(mrec); + ctx = get_attr_search_ctx(base_ni, mrec); + if (!ctx) { + err = -ENOMEM; + goto err_out; + } + if (!lookup_attr(ni->type, ni->name, ni->name_len, IGNORE_CASE, vcn, + NULL, 0, ctx)) { + put_attr_search_ctx(ctx); + err = -ENOENT; + goto err_out; + } + + down_write(&ni->run_list.lock); + /* Make sure someone else didn't do the work while we were sleeping. */ + if (likely(vcn_to_lcn(ni->run_list.rl, vcn) <= LCN_RL_NOT_MAPPED)) { + run_list_element *rl; + + rl = decompress_mapping_pairs(ni->vol, ctx->attr, + ni->run_list.rl); + if (unlikely(IS_ERR(rl))) + err = PTR_ERR(rl); + else + ni->run_list.rl = rl; + } + up_write(&ni->run_list.lock); + + put_attr_search_ctx(ctx); +err_out: + unmap_mft_record(base_ni); + return err; +} + +/** + * vcn_to_lcn - convert a vcn into a lcn given a run list + * @rl: run list to use for conversion + * @vcn: vcn to convert + * + * Convert the virtual cluster number @vcn of an attribute into a logical + * cluster number (lcn) of a device using the run list @rl to map vcns to their + * corresponding lcns. + * + * It is up to the caller to serialize access to the run list @rl. + * + * Since lcns must be >= 0, we use negative return values with special meaning: + * + * Return value Meaning / Description + * ================================================== + * -1 = LCN_HOLE Hole / not allocated on disk. + * -2 = LCN_RL_NOT_MAPPED This is part of the run list which has not been + * inserted into the run list yet. + * -3 = LCN_ENOENT There is no such vcn in the attribute. + * -4 = LCN_EINVAL Input parameter error (if debug enabled). + */ +LCN vcn_to_lcn(const run_list_element *rl, const VCN vcn) +{ + int i; + +#ifdef DEBUG + if (vcn < (VCN)0) + return (LCN)LCN_EINVAL; +#endif + /* + * If rl is NULL, assume that we have found an unmapped run list. The + * caller can then attempt to map it and fail appropriately if + * necessary. + */ + if (unlikely(!rl)) + return (LCN)LCN_RL_NOT_MAPPED; + + /* Catch out of lower bounds vcn. */ + if (unlikely(vcn < rl[0].vcn)) + return (LCN)LCN_ENOENT; + + for (i = 0; likely(rl[i].length); i++) { + if (unlikely(vcn < rl[i+1].vcn)) { + if (likely(rl[i].lcn >= (LCN)0)) + return rl[i].lcn + (vcn - rl[i].vcn); + return rl[i].lcn; + } + } + /* + * The terminator element is setup to the correct value, i.e. one of + * LCN_HOLE, LCN_RL_NOT_MAPPED, or LCN_ENOENT. + */ + if (likely(rl[i].lcn < (LCN)0)) + return rl[i].lcn; + /* Just in case... We could replace this with BUG() some day. */ + return (LCN)LCN_ENOENT; +} + +/** + * find_attr - find (next) attribute in mft record + * @type: attribute type to find + * @name: attribute name to find (optional, i.e. NULL means don't care) + * @name_len: attribute name length (only needed if @name present) + * @ic: IGNORE_CASE or CASE_SENSITIVE (ignored if @name not present) + * @val: attribute value to find (optional, resident attributes only) + * @val_len: attribute value length + * @ctx: search context with mft record and attribute to search from + * + * You shouldn't need to call this function directly. Use lookup_attr() instead. + * + * find_attr() takes a search context @ctx as parameter and searches the mft + * record specified by @ctx->mrec, beginning at @ctx->attr, for an attribute of + * @type, optionally @name and @val. If found, find_attr() returns TRUE and + * @ctx->attr will point to the found attribute. If not found, find_attr() + * returns FALSE and @ctx->attr is undefined (i.e. do not rely on it not + * changing). + * + * If @ctx->is_first is TRUE, the search begins with @ctx->attr itself. If it + * is FALSE, the search begins after @ctx->attr. + * + * If @ic is IGNORE_CASE, the @name comparisson is not case sensitive and + * @ctx->ntfs_ino must be set to the ntfs inode to which the mft record + * @ctx->mrec belongs. This is so we can get at the ntfs volume and hence at + * the upcase table. If @ic is CASE_SENSITIVE, the comparison is case + * sensitive. When @name is present, @name_len is the @name length in Unicode + * characters. + * + * If @name is not present (NULL), we assume that the unnamed attribute is + * being searched for. + * + * Finally, the resident attribute value @val is looked for, if present. If @val + * is not present (NULL), @val_len is ignored. + * + * find_attr() only searches the specified mft record and it ignores the + * presence of an attribute list attribute (unless it is the one being searched + * for, obviously). If you need to take attribute lists into consideration, use + * lookup_attr() instead (see below). This also means that you cannot use + * find_attr() to search for extent records of non-resident attributes, as + * extents with lowest_vcn != 0 are usually described by the attribute list + * attribute only. - Note that it is possible that the first extent is only in + * the attribute list while the last extent is in the base mft record, so don't + * rely on being able to find the first extent in the base mft record. + * + * Warning: Never use @val when looking for attribute types which can be + * non-resident as this most likely will result in a crash! + */ +BOOL find_attr(const ATTR_TYPES type, const uchar_t *name, const u32 name_len, + const IGNORE_CASE_BOOL ic, const u8 *val, const u32 val_len, + attr_search_context *ctx) +{ + ATTR_RECORD *a; + ntfs_volume *vol; + uchar_t *upcase; + u32 upcase_len; + + if (ic == IGNORE_CASE) { + vol = ctx->ntfs_ino->vol; + upcase = vol->upcase; + upcase_len = vol->upcase_len; + } else { + vol = NULL; + upcase = NULL; + upcase_len = 0; + } + /* + * Iterate over attributes in mft record starting at @ctx->attr, or the + * attribute following that, if @ctx->is_first is TRUE. + */ + if (ctx->is_first) { + a = ctx->attr; + ctx->is_first = FALSE; + } else + a = (ATTR_RECORD*)((u8*)ctx->attr + + le32_to_cpu(ctx->attr->length)); + for (;; a = (ATTR_RECORD*)((u8*)a + le32_to_cpu(a->length))) { + if ((u8*)a < (u8*)ctx->mrec || (u8*)a > (u8*)ctx->mrec + + le32_to_cpu(ctx->mrec->bytes_allocated)) + break; + ctx->attr = a; + /* We catch $END with this more general check, too... */ + if (le32_to_cpu(a->type) > le32_to_cpu(type)) + return FALSE; + if (unlikely(!a->length)) + break; + if (a->type != type) + continue; + /* + * If @name is present, compare the two names. If @name is + * missing, assume we want an unnamed attribute. + */ + if (!name) { + /* The search failed if the found attribute is named. */ + if (a->name_length) + return FALSE; + } else if (!ntfs_are_names_equal(name, name_len, + (uchar_t*)((u8*)a + le16_to_cpu(a->name_offset)), + a->name_length, ic, upcase, upcase_len)) { + register int rc; + + rc = ntfs_collate_names(name, name_len, + (uchar_t*)((u8*)a + + le16_to_cpu(a->name_offset)), + a->name_length, 1, IGNORE_CASE, + upcase, upcase_len); + /* + * If @name collates before a->name, there is no + * matching attribute. + */ + if (rc == -1) + return FALSE; + /* If the strings are not equal, continue search. */ + if (rc) + continue; + rc = ntfs_collate_names(name, name_len, + (uchar_t*)((u8*)a + + le16_to_cpu(a->name_offset)), + a->name_length, 1, CASE_SENSITIVE, + upcase, upcase_len); + if (rc == -1) + return FALSE; + if (rc) + continue; + } + /* + * The names match or @name not present and attribute is + * unnamed. If no @val specified, we have found the attribute + * and are done. + */ + if (!val) + return TRUE; + /* @val is present; compare values. */ + else { + u32 vl; + register int rc; + + vl = le32_to_cpu(a->data.resident.value_length); + if (vl > val_len) + vl = val_len; + + rc = memcmp(val, (u8*)a + le16_to_cpu( + a->data.resident.value_offset), vl); + /* + * If @val collates before the current attribute's + * value, there is no matching attribute. + */ + if (!rc) { + register u32 avl; + avl = le32_to_cpu( + a->data.resident.value_length); + if (val_len == avl) + return TRUE; + if (val_len < avl) + return FALSE; + } else if (rc < 0) + return FALSE; + } + } + ntfs_error(NULL, "Inode is corrupt. Run chkdsk."); + return FALSE; +} + +/** + * load_attribute_list - load an attribute list into memory + * @vol: ntfs volume from which to read + * @run_list: run list of the attribute list + * @al_start: destination buffer + * @size: size of the destination buffer in bytes + * @initialized_size: initialized size of the attribute list + * + * Walk the run list @run_list and load all clusters from it copying them into + * the linear buffer @al. The maximum number of bytes copied to @al is @size + * bytes. Note, @size does not need to be a multiple of the cluster size. If + * @initialized_size is less than @size, the region in @al between + * @initialized_size and @size will be zeroed and not read from disk. + * + * Return 0 on success or -errno on error. + */ +int load_attribute_list(ntfs_volume *vol, run_list *run_list, u8 *al_start, + const s64 size, const s64 initialized_size) +{ + LCN lcn; + u8 *al = al_start; + u8 *al_end = al + initialized_size; + run_list_element *rl; + struct buffer_head *bh; + struct super_block *sb = vol->sb; + unsigned long block_size = sb->s_blocksize; + unsigned long block, max_block; + int err = 0; + unsigned char block_size_bits = sb->s_blocksize_bits; + + ntfs_debug("Entering."); + if (!vol || !run_list || !al || size <= 0 || initialized_size < 0 || + initialized_size > size) + return -EINVAL; + if (!initialized_size) { + memset(al, 0, size); + return 0; + } + down_read(&run_list->lock); + rl = run_list->rl; + /* Read all clusters specified by the run list one run at a time. */ + while (rl->length) { + lcn = vcn_to_lcn(rl, rl->vcn); + ntfs_debug("Reading vcn = 0x%Lx, lcn = 0x%Lx.", + (long long)rl->vcn, (long long)lcn); + /* The attribute list cannot be sparse. */ + if (lcn < 0) { + ntfs_error(sb, "vcn_to_lcn() failed. Cannot read " + "attribute list."); + goto err_out; + } + block = lcn << vol->cluster_size_bits >> block_size_bits; + /* Read the run from device in chunks of block_size bytes. */ + max_block = block + (rl->length << vol->cluster_size_bits >> + block_size_bits); + ntfs_debug("max_block = 0x%lx.", max_block); + do { + ntfs_debug("Reading block = 0x%lx.", block); + bh = sb_bread(sb, block); + if (!bh) { + ntfs_error(sb, "sb_bread() failed. Cannot " + "read attribute list."); + goto err_out; + } + if (al + block_size >= al_end) + goto do_final; + memcpy(al, bh->b_data, block_size); + brelse(bh); + al += block_size; + } while (++block < max_block); + rl++; + } + if (initialized_size < size) { +initialize: + memset(al_start + initialized_size, 0, size - initialized_size); + } +done: + up_read(&run_list->lock); + return err; +do_final: + if (al < al_end) { + /* + * Partial block. + * + * Note: The attribute list can be smaller than its allocation + * by multiple clusters. This has been encountered by at least + * two people running Windows XP, thus we cannot do any + * truncation sanity checking here. (AIA) + */ + memcpy(al, bh->b_data, al_end - al); + brelse(bh); + if (initialized_size < size) + goto initialize; + goto done; + } + brelse(bh); + /* Real overflow! */ + ntfs_error(sb, "Attribute list buffer overflow. Read attribute list " + "is truncated."); +err_out: + err = -EIO; + goto done; +} + +/** + * find_external_attr - find an attribute in the attribute list of an ntfs inode + * @type: attribute type to find + * @name: attribute name to find (optional, i.e. NULL means don't care) + * @name_len: attribute name length (only needed if @name present) + * @ic: IGNORE_CASE or CASE_SENSITIVE (ignored if @name not present) + * @lowest_vcn: lowest vcn to find (optional, non-resident attributes only) + * @val: attribute value to find (optional, resident attributes only) + * @val_len: attribute value length + * @ctx: search context with mft record and attribute to search from + * + * You shouldn't need to call this function directly. Use lookup_attr() instead. + * + * Find an attribute by searching the attribute list for the corresponding + * attribute list entry. Having found the entry, map the mft record for read + * if the attribute is in a different mft record/inode, find_attr the attribute + * in there and return it. + * + * On first search @ctx->ntfs_ino must be the base mft record and @ctx must + * have been obtained from a call to get_attr_search_ctx(). On subsequent calls + * @ctx->ntfs_ino can be any extent inode, too (@ctx->base_ntfs_ino is then the + * base inode). + * + * After finishing with the attribute/mft record you need to call + * release_attr_search_ctx() to cleanup the search context (unmapping any + * mapped inodes, etc). + * + * Return TRUE if the search was successful and FALSE if not. When TRUE, + * @ctx->attr is the found attribute and it is in mft record @ctx->mrec. When + * FALSE, @ctx->attr is the attribute which collates just after the attribute + * being searched for in the base ntfs inode, i.e. if one wants to add the + * attribute to the mft record this is the correct place to insert it into + * and if there is not enough space, the attribute should be placed in an + * extent mft record. + */ +static BOOL find_external_attr(const ATTR_TYPES type, const uchar_t *name, + const u32 name_len, const IGNORE_CASE_BOOL ic, + const VCN lowest_vcn, const u8 *val, const u32 val_len, + attr_search_context *ctx) +{ + ntfs_inode *base_ni, *ni; + ntfs_volume *vol; + ATTR_LIST_ENTRY *al_entry, *next_al_entry; + u8 *al_start, *al_end; + ATTR_RECORD *a; + uchar_t *al_name; + u32 al_name_len; + + ni = ctx->ntfs_ino; + base_ni = ctx->base_ntfs_ino; + ntfs_debug("Entering for inode 0x%lx, type 0x%x.", ni->mft_no, type); + if (!base_ni) { + /* First call happens with the base mft record. */ + base_ni = ctx->base_ntfs_ino = ctx->ntfs_ino; + ctx->base_mrec = ctx->mrec; + } + if (ni == base_ni) + ctx->base_attr = ctx->attr; + vol = base_ni->vol; + al_start = base_ni->attr_list; + al_end = al_start + base_ni->attr_list_size; + if (!ctx->al_entry) + ctx->al_entry = (ATTR_LIST_ENTRY*)al_start; + /* + * Iterate over entries in attribute list starting at @ctx->al_entry, + * or the entry following that, if @ctx->is_first is TRUE. + */ + if (ctx->is_first) { + al_entry = ctx->al_entry; + ctx->is_first = FALSE; + } else + al_entry = (ATTR_LIST_ENTRY*)((u8*)ctx->al_entry + + le16_to_cpu(ctx->al_entry->length)); + for (;; al_entry = next_al_entry) { + /* Out of bounds check. */ + if ((u8*)al_entry < base_ni->attr_list || + (u8*)al_entry > al_end) + break; /* Inode is corrupt. */ + ctx->al_entry = al_entry; + /* Catch the end of the attribute list. */ + if ((u8*)al_entry == al_end) + goto not_found; + if (!al_entry->length) + break; + if ((u8*)al_entry + 6 > al_end || (u8*)al_entry + + le16_to_cpu(al_entry->length) > al_end) + break; + next_al_entry = (ATTR_LIST_ENTRY*)((u8*)al_entry + + le16_to_cpu(al_entry->length)); + if (le32_to_cpu(al_entry->type) > le32_to_cpu(type)) + goto not_found; + if (type != al_entry->type) + continue; + /* + * If @name is present, compare the two names. If @name is + * missing, assume we want an unnamed attribute. + */ + al_name_len = al_entry->name_length; + al_name = (uchar_t*)((u8*)al_entry + al_entry->name_offset); + if (!name) { + if (al_name_len) + goto not_found; + } else if (!ntfs_are_names_equal(al_name, al_name_len, name, + name_len, ic, vol->upcase, vol->upcase_len)) { + register int rc; + + rc = ntfs_collate_names(name, name_len, al_name, + al_name_len, 1, IGNORE_CASE, + vol->upcase, vol->upcase_len); + /* + * If @name collates before al_name, there is no + * matching attribute. + */ + if (rc == -1) + goto not_found; + /* If the strings are not equal, continue search. */ + if (rc) + continue; + /* + * FIXME: Reverse engineering showed 0, IGNORE_CASE but + * that is inconsistent with find_attr(). The subsequent + * rc checks were also different. Perhaps I made a + * mistake in one of the two. Need to recheck which is + * correct or at least see what is going on... (AIA) + */ + rc = ntfs_collate_names(name, name_len, al_name, + al_name_len, 1, CASE_SENSITIVE, + vol->upcase, vol->upcase_len); + if (rc == -1) + goto not_found; + if (rc) + continue; + } + /* + * The names match or @name not present and attribute is + * unnamed. Now check @lowest_vcn. Continue search if the + * next attribute list entry still fits @lowest_vcn. Otherwise + * we have reached the right one or the search has failed. + */ + if (lowest_vcn && (u8*)next_al_entry >= al_start && + (u8*)next_al_entry + 6 < al_end && + (u8*)next_al_entry + le16_to_cpu( + next_al_entry->length) <= al_end && + sle64_to_cpu(next_al_entry->lowest_vcn) <= + sle64_to_cpu(lowest_vcn) && + next_al_entry->type == al_entry->type && + next_al_entry->name_length == al_name_len && + ntfs_are_names_equal((uchar_t*)((u8*) + next_al_entry + + next_al_entry->name_offset), + next_al_entry->name_length, + al_name, al_name_len, CASE_SENSITIVE, + vol->upcase, vol->upcase_len)) + continue; + if (MREF_LE(al_entry->mft_reference) == ni->mft_no) { + if (MSEQNO_LE(al_entry->mft_reference) != ni->seq_no) { + ntfs_error(vol->sb, "Found stale mft " + "reference in attribute list!"); + break; + } + } else { /* Mft references do not match. */ + /* If there is a mapped record unmap it first. */ + if (ni != base_ni) + unmap_extent_mft_record(ni); + /* Do we want the base record back? */ + if (MREF_LE(al_entry->mft_reference) == + base_ni->mft_no) { + ni = ctx->ntfs_ino = base_ni; + ctx->mrec = ctx->base_mrec; + } else { + /* We want an extent record. */ + ctx->mrec = map_extent_mft_record(base_ni, + al_entry->mft_reference, &ni); + ctx->ntfs_ino = ni; + if (IS_ERR(ctx->mrec)) { + ntfs_error(vol->sb, "Failed to map mft " + "record, error code " + "%ld.", + -PTR_ERR(ctx->mrec)); + break; + } + } + ctx->attr = (ATTR_RECORD*)((u8*)ctx->mrec + + le16_to_cpu(ctx->mrec->attrs_offset)); + } + /* + * ctx->vfs_ino, ctx->mrec, and ctx->attr now point to the + * mft record containing the attribute represented by the + * current al_entry. + */ + /* + * We could call into find_attr() to find the right attribute + * in this mft record but this would be less efficient and not + * quite accurate as find_attr() ignores the attribute instance + * numbers for example which become important when one plays + * with attribute lists. Also, because a proper match has been + * found in the attribute list entry above, the comparison can + * now be optimized. So it is worth re-implementing a + * simplified find_attr() here. + */ + a = ctx->attr; + /* + * Use a manual loop so we can still use break and continue + * with the same meanings as above. + */ +do_next_attr_loop: + if ((u8*)a < (u8*)ctx->mrec || (u8*)a > (u8*)ctx->mrec + + le32_to_cpu(ctx->mrec->bytes_allocated)) + break; + if (a->type == AT_END) + continue; + if (!a->length) + break; + if (al_entry->instance != a->instance) + goto do_next_attr; + if (al_entry->type != a->type) + continue; + if (name) { + if (a->name_length != al_name_len) + continue; + if (!ntfs_are_names_equal((uchar_t*)((u8*)a + + le16_to_cpu(a->name_offset)), + a->name_length, al_name, al_name_len, + CASE_SENSITIVE, vol->upcase, + vol->upcase_len)) + continue; + } + ctx->attr = a; + /* + * If no @val specified or @val specified and it matches, we + * have found it! + */ + if (!val || (!a->non_resident && le32_to_cpu( + a->data.resident.value_length) == val_len && + !memcmp((u8*)a + + le16_to_cpu(a->data.resident.value_offset), + val, val_len))) { + ntfs_debug("Done, found."); + return TRUE; + } +do_next_attr: + /* Proceed to the next attribute in the current mft record. */ + a = (ATTR_RECORD*)((u8*)a + le32_to_cpu(a->length)); + goto do_next_attr_loop; + } + ntfs_error(base_ni->vol->sb, "Inode contains corrupt attribute list " + "attribute.\n"); + if (ni != base_ni) { + unmap_extent_mft_record(ni); + ctx->ntfs_ino = base_ni; + ctx->mrec = ctx->base_mrec; + ctx->attr = ctx->base_attr; + } + /* + * FIXME: We absolutely have to return ERROR status instead of just + * false or we will blow up or even worse cause corruption when we add + * write support and we reach this code path! + */ + printk(KERN_CRIT "NTFS: FIXME: Hit unfinished error code path!!!\n"); + return FALSE; +not_found: + /* + * Seek to the end of the base mft record, i.e. when we return false, + * ctx->mrec and ctx->attr indicate where the attribute should be + * inserted into the attribute record. + * And of course ctx->al_entry points to the end of the attribute + * list inside NTFS_I(ctx->base_vfs_ino)->attr_list. + * + * FIXME: Do we really want to do this here? Think about it... (AIA) + */ + reinit_attr_search_ctx(ctx); + find_attr(type, name, name_len, ic, val, val_len, ctx); + ntfs_debug("Done, not found."); + return FALSE; +} + +/** + * lookup_attr - find an attribute in an ntfs inode + * @type: attribute type to find + * @name: attribute name to find (optional, i.e. NULL means don't care) + * @name_len: attribute name length (only needed if @name present) + * @ic: IGNORE_CASE or CASE_SENSITIVE (ignored if @name not present) + * @lowest_vcn: lowest vcn to find (optional, non-resident attributes only) + * @val: attribute value to find (optional, resident attributes only) + * @val_len: attribute value length + * @ctx: search context with mft record and attribute to search from + * + * Find an attribute in an ntfs inode. On first search @ctx->ntfs_ino must + * be the base mft record and @ctx must have been obtained from a call to + * get_attr_search_ctx(). + * + * This function transparently handles attribute lists and @ctx is used to + * continue searches where they were left off at. + * + * After finishing with the attribute/mft record you need to call + * release_attr_search_ctx() to cleanup the search context (unmapping any + * mapped inodes, etc). + * + * Return TRUE if the search was successful and FALSE if not. When TRUE, + * @ctx->attr is the found attribute and it is in mft record @ctx->mrec. When + * FALSE, @ctx->attr is the attribute which collates just after the attribute + * being searched for, i.e. if one wants to add the attribute to the mft + * record this is the correct place to insert it into. + */ +BOOL lookup_attr(const ATTR_TYPES type, const uchar_t *name, const u32 name_len, + const IGNORE_CASE_BOOL ic, const VCN lowest_vcn, const u8 *val, + const u32 val_len, attr_search_context *ctx) +{ + ntfs_inode *base_ni; + + ntfs_debug("Entering."); + if (ctx->base_ntfs_ino) + base_ni = ctx->base_ntfs_ino; + else + base_ni = ctx->ntfs_ino; + /* Sanity check, just for debugging really. */ + BUG_ON(!base_ni); + if (!NInoAttrList(base_ni)) + return find_attr(type, name, name_len, ic, val, val_len, ctx); + return find_external_attr(type, name, name_len, ic, lowest_vcn, val, + val_len, ctx); +} + +/** + * init_attr_search_ctx - initialize an attribute search context + * @ctx: attribute search context to initialize + * @ni: ntfs inode with which to initialize the search context + * @mrec: mft record with which to initialize the search context + * + * Initialize the attribute search context @ctx with @ni and @mrec. + */ +static inline void init_attr_search_ctx(attr_search_context *ctx, + ntfs_inode *ni, MFT_RECORD *mrec) +{ + ctx->mrec = mrec; + /* Sanity checks are performed elsewhere. */ + ctx->attr = (ATTR_RECORD*)((u8*)mrec + le16_to_cpu(mrec->attrs_offset)); + ctx->is_first = TRUE; + ctx->ntfs_ino = ni; + ctx->al_entry = NULL; + ctx->base_ntfs_ino = NULL; + ctx->base_mrec = NULL; + ctx->base_attr = NULL; +} + +/** + * reinit_attr_search_ctx - reinitialize an attribute search context + * @ctx: attribute search context to reinitialize + * + * Reinitialize the attribute search context @ctx, unmapping an associated + * extent mft record if present, and initialize the search context again. + * + * This is used when a search for a new attribute is being started to reset + * the search context to the beginning. + */ +void reinit_attr_search_ctx(attr_search_context *ctx) +{ + if (likely(!ctx->base_ntfs_ino)) { + /* No attribute list. */ + ctx->is_first = TRUE; + /* Sanity checks are performed elsewhere. */ + ctx->attr = (ATTR_RECORD*)((u8*)ctx->mrec + + le16_to_cpu(ctx->mrec->attrs_offset)); + return; + } /* Attribute list. */ + if (ctx->ntfs_ino != ctx->base_ntfs_ino) + unmap_extent_mft_record(ctx->ntfs_ino); + init_attr_search_ctx(ctx, ctx->base_ntfs_ino, ctx->base_mrec); + return; +} + +/** + * get_attr_search_ctx - allocate and initialize a new attribute search context + * @ni: ntfs inode with which to initialize the search context + * @mrec: mft record with which to initialize the search context + * + * Allocate a new attribute search context, initialize it with @ni and @mrec, + * and return it. Return NULL if allocation failed. + */ +attr_search_context *get_attr_search_ctx(ntfs_inode *ni, MFT_RECORD *mrec) +{ + attr_search_context *ctx; + + ctx = kmem_cache_alloc(ntfs_attr_ctx_cache, SLAB_NOFS); + if (ctx) + init_attr_search_ctx(ctx, ni, mrec); + return ctx; +} + +/** + * put_attr_search_ctx - release an attribute search context + * @ctx: attribute search context to free + * + * Release the attribute search context @ctx, unmapping an associated extent + * mft record if present. + */ +void put_attr_search_ctx(attr_search_context *ctx) +{ + if (ctx->base_ntfs_ino && ctx->ntfs_ino != ctx->base_ntfs_ino) + unmap_extent_mft_record(ctx->ntfs_ino); + kmem_cache_free(ntfs_attr_ctx_cache, ctx); + return; +} + diff -urN linux-2.4.24-vanilla/fs/ntfs/attrib.h linux-2.4.24-ntfs-2.1.6a/fs/ntfs/attrib.h --- linux-2.4.24-vanilla/fs/ntfs/attrib.h 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.4.24-ntfs-2.1.6a/fs/ntfs/attrib.h 2004-01-21 14:31:43.000000000 +0000 @@ -0,0 +1,106 @@ +/* + * attrib.h - Defines for attribute handling in NTFS Linux kernel driver. + * Part of the Linux-NTFS project. + * + * Copyright (c) 2001-2003 Anton Altaparmakov + * Copyright (c) 2002 Richard Russon + * + * This program/include file is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as published + * by the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program/include file is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program (in the main directory of the Linux-NTFS + * distribution in the file COPYING); if not, write to the Free Software + * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _LINUX_NTFS_ATTRIB_H +#define _LINUX_NTFS_ATTRIB_H + +#include + +#include "endian.h" +#include "types.h" +#include "layout.h" + +static inline void init_run_list(run_list *rl) +{ + rl->rl = NULL; + init_rwsem(&rl->lock); +} + +typedef enum { + LCN_HOLE = -1, /* Keep this as highest value or die! */ + LCN_RL_NOT_MAPPED = -2, + LCN_ENOENT = -3, + LCN_EINVAL = -4, +} LCN_SPECIAL_VALUES; + +/** + * attr_search_context - used in attribute search functions + * @mrec: buffer containing mft record to search + * @attr: attribute record in @mrec where to begin/continue search + * @is_first: if true lookup_attr() begins search with @attr, else after @attr + * + * Structure must be initialized to zero before the first call to one of the + * attribute search functions. Initialize @mrec to point to the mft record to + * search, and @attr to point to the first attribute within @mrec (not necessary + * if calling the _first() functions), and set @is_first to TRUE (not necessary + * if calling the _first() functions). + * + * If @is_first is TRUE, the search begins with @attr. If @is_first is FALSE, + * the search begins after @attr. This is so that, after the first call to one + * of the search attribute functions, we can call the function again, without + * any modification of the search context, to automagically get the next + * matching attribute. + */ +typedef struct { + MFT_RECORD *mrec; + ATTR_RECORD *attr; + BOOL is_first; + ntfs_inode *ntfs_ino; + ATTR_LIST_ENTRY *al_entry; + ntfs_inode *base_ntfs_ino; + MFT_RECORD *base_mrec; + ATTR_RECORD *base_attr; +} attr_search_context; + +extern run_list_element *decompress_mapping_pairs(const ntfs_volume *vol, + const ATTR_RECORD *attr, run_list_element *old_rl); + +extern int map_run_list(ntfs_inode *ni, VCN vcn); + +extern LCN vcn_to_lcn(const run_list_element *rl, const VCN vcn); + +extern BOOL find_attr(const ATTR_TYPES type, const uchar_t *name, + const u32 name_len, const IGNORE_CASE_BOOL ic, const u8 *val, + const u32 val_len, attr_search_context *ctx); + +BOOL lookup_attr(const ATTR_TYPES type, const uchar_t *name, const u32 name_len, + const IGNORE_CASE_BOOL ic, const VCN lowest_vcn, const u8 *val, + const u32 val_len, attr_search_context *ctx); + +extern int load_attribute_list(ntfs_volume *vol, run_list *rl, u8 *al_start, + const s64 size, const s64 initialized_size); + +static inline s64 attribute_value_length(const ATTR_RECORD *a) +{ + if (!a->non_resident) + return (s64)le32_to_cpu(a->data.resident.value_length); + return sle64_to_cpu(a->data.non_resident.data_size); +} + +extern void reinit_attr_search_ctx(attr_search_context *ctx); +extern attr_search_context *get_attr_search_ctx(ntfs_inode *ni, + MFT_RECORD *mrec); +extern void put_attr_search_ctx(attr_search_context *ctx); + +#endif /* _LINUX_NTFS_ATTRIB_H */ + diff -urN linux-2.4.24-vanilla/fs/ntfs/compress.c linux-2.4.24-ntfs-2.1.6a/fs/ntfs/compress.c --- linux-2.4.24-vanilla/fs/ntfs/compress.c 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.4.24-ntfs-2.1.6a/fs/ntfs/compress.c 2004-01-21 14:28:25.000000000 +0000 @@ -0,0 +1,946 @@ +/** + * compress.c - NTFS kernel compressed attributes handling. + * Part of the Linux-NTFS project. + * + * Copyright (c) 2001-2003 Anton Altaparmakov + * Copyright (c) 2002 Richard Russon + * + * This program/include file is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as published + * by the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program/include file is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program (in the main directory of the Linux-NTFS + * distribution in the file COPYING); if not, write to the Free Software + * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include + +#include "ntfs.h" + +/** + * ntfs_compression_constants - enum of constants used in the compression code + */ +typedef enum { + /* Token types and access mask. */ + NTFS_SYMBOL_TOKEN = 0, + NTFS_PHRASE_TOKEN = 1, + NTFS_TOKEN_MASK = 1, + + /* Compression sub-block constants. */ + NTFS_SB_SIZE_MASK = 0x0fff, + NTFS_SB_SIZE = 0x1000, + NTFS_SB_IS_COMPRESSED = 0x8000, + + /* + * The maximum compression block size is by definition 16 * the cluster + * size, with the maximum supported cluster size being 4kiB. Thus the + * maximum compression buffer size is 64kiB, so we use this when + * initializing the compression buffer. + */ + NTFS_MAX_CB_SIZE = 64 * 1024, +} ntfs_compression_constants; + +/** + * ntfs_compression_buffer - one buffer for the decompression engine + */ +static u8 *ntfs_compression_buffer = NULL; + +/** + * ntfs_cb_lock - spinlock which protects ntfs_compression_buffer + */ +static spinlock_t ntfs_cb_lock = SPIN_LOCK_UNLOCKED; + +/** + * allocate_compression_buffers - allocate the decompression buffers + * + * Caller has to hold the ntfs_lock semaphore. + * + * Return 0 on success or -ENOMEM if the allocations failed. + */ +int allocate_compression_buffers(void) +{ + BUG_ON(ntfs_compression_buffer); + + ntfs_compression_buffer = vmalloc(NTFS_MAX_CB_SIZE); + if (!ntfs_compression_buffer) + return -ENOMEM; + return 0; +} + +/** + * free_compression_buffers - free the decompression buffers + * + * Caller has to hold the ntfs_lock semaphore. + */ +void free_compression_buffers(void) +{ + BUG_ON(!ntfs_compression_buffer); + vfree(ntfs_compression_buffer); + ntfs_compression_buffer = NULL; +} + +/** + * zero_partial_compressed_page - zero out of bounds compressed page region + */ +static void zero_partial_compressed_page(ntfs_inode *ni, struct page *page) +{ + u8 *kp = page_address(page); + unsigned int kp_ofs; + + ntfs_debug("Zeroing page region outside initialized size."); + if (((s64)page->index << PAGE_CACHE_SHIFT) >= ni->initialized_size) { + /* + * FIXME: Using clear_page() will become wrong when we get + * PAGE_CACHE_SIZE != PAGE_SIZE but for now there is no problem. + */ + clear_page(kp); + return; + } + kp_ofs = ni->initialized_size & ~PAGE_CACHE_MASK; + memset(kp + kp_ofs, 0, PAGE_CACHE_SIZE - kp_ofs); + return; +} + +/** + * handle_bounds_compressed_page - test for&handle out of bounds compressed page + */ +static inline void handle_bounds_compressed_page(ntfs_inode *ni, + struct page *page) +{ + if ((page->index >= (ni->initialized_size >> PAGE_CACHE_SHIFT)) && + (ni->initialized_size < VFS_I(ni)->i_size)) + zero_partial_compressed_page(ni, page); + return; +} + +/** + * ntfs_decompress - decompress a compression block into an array of pages + * @dest_pages: destination array of pages + * @dest_index: current index into @dest_pages (IN/OUT) + * @dest_ofs: current offset within @dest_pages[@dest_index] (IN/OUT) + * @dest_max_index: maximum index into @dest_pages (IN) + * @dest_max_ofs: maximum offset within @dest_pages[@dest_max_index] (IN) + * @xpage: the target page (-1 if none) (IN) + * @xpage_done: set to 1 if xpage was completed successfully (IN/OUT) + * @cb_start: compression block to decompress (IN) + * @cb_size: size of compression block @cb_start in bytes (IN) + * + * The caller must have disabled preemption. ntfs_decompress() reenables it when + * the critical section is finished. + * + * This decompresses the compression block @cb_start into the array of + * destination pages @dest_pages starting at index @dest_index into @dest_pages + * and at offset @dest_pos into the page @dest_pages[@dest_index]. + * + * When the page @dest_pages[@xpage] is completed, @xpage_done is set to 1. + * If xpage is -1 or @xpage has not been completed, @xpage_done is not modified. + * + * @cb_start is a pointer to the compression block which needs decompressing + * and @cb_size is the size of @cb_start in bytes (8-64kiB). + * + * Return 0 if success or -EOVERFLOW on error in the compressed stream. + * @xpage_done indicates whether the target page (@dest_pages[@xpage]) was + * completed during the decompression of the compression block (@cb_start). + * + * Warning: This function *REQUIRES* PAGE_CACHE_SIZE >= 4096 or it will blow up + * unpredicatbly! You have been warned! + * + * Note to hackers: This function may not sleep until it has finished accessing + * the compression block @cb_start as it is a per-CPU buffer. + */ +static int ntfs_decompress(struct page *dest_pages[], int *dest_index, + int *dest_ofs, const int dest_max_index, const int dest_max_ofs, + const int xpage, char *xpage_done, u8 *const cb_start, + const u32 cb_size) +{ + /* + * Pointers into the compressed data, i.e. the compression block (cb), + * and the therein contained sub-blocks (sb). + */ + u8 *cb_end = cb_start + cb_size; /* End of cb. */ + u8 *cb = cb_start; /* Current position in cb. */ + u8 *cb_sb_start = cb; /* Beginning of the current sb in the cb. */ + u8 *cb_sb_end; /* End of current sb / beginning of next sb. */ + + /* Variables for uncompressed data / destination. */ + struct page *dp; /* Current destination page being worked on. */ + u8 *dp_addr; /* Current pointer into dp. */ + u8 *dp_sb_start; /* Start of current sub-block in dp. */ + u8 *dp_sb_end; /* End of current sb in dp (dp_sb_start + + NTFS_SB_SIZE). */ + u16 do_sb_start; /* @dest_ofs when starting this sub-block. */ + u16 do_sb_end; /* @dest_ofs of end of this sb (do_sb_start + + NTFS_SB_SIZE). */ + + /* Variables for tag and token parsing. */ + u8 tag; /* Current tag. */ + int token; /* Loop counter for the eight tokens in tag. */ + + /* Need this because we can't sleep, so need two stages. */ + int completed_pages[dest_max_index - *dest_index + 1]; + int nr_completed_pages = 0; + + /* Default error code. */ + int err = -EOVERFLOW; + + ntfs_debug("Entering, cb_size = 0x%x.", cb_size); +do_next_sb: + ntfs_debug("Beginning sub-block at offset = 0x%x in the cb.", + cb - cb_start); + + /* Have we reached the end of the compression block? */ + if (cb == cb_end || !le16_to_cpup((u16*)cb)) { + int i; + + ntfs_debug("Completed. Returning success (0)."); + err = 0; +return_error: + /* We can sleep from now on, so we drop lock. */ + spin_unlock(&ntfs_cb_lock); + /* Second stage: finalize completed pages. */ + if (nr_completed_pages > 0) { + struct page *page = dest_pages[completed_pages[0]]; + ntfs_inode *ni = NTFS_I(page->mapping->host); + + for (i = 0; i < nr_completed_pages; i++) { + int di = completed_pages[i]; + + dp = dest_pages[di]; + /* + * If we are outside the initialized size, zero + * the out of bounds page range. + */ + handle_bounds_compressed_page(ni, dp); + flush_dcache_page(dp); + kunmap(dp); + SetPageUptodate(dp); + unlock_page(dp); + if (di == xpage) + *xpage_done = 1; + else + page_cache_release(dp); + dest_pages[di] = NULL; + } + } + return err; + } + + /* Setup offsets for the current sub-block destination. */ + do_sb_start = *dest_ofs; + do_sb_end = do_sb_start + NTFS_SB_SIZE; + + /* Check that we are still within allowed boundaries. */ + if (*dest_index == dest_max_index && do_sb_end > dest_max_ofs) + goto return_overflow; + + /* Does the minimum size of a compressed sb overflow valid range? */ + if (cb + 6 > cb_end) + goto return_overflow; + + /* Setup the current sub-block source pointers and validate range. */ + cb_sb_start = cb; + cb_sb_end = cb_sb_start + (le16_to_cpup((u16*)cb) & NTFS_SB_SIZE_MASK) + + 3; + if (cb_sb_end > cb_end) + goto return_overflow; + + /* Get the current destination page. */ + dp = dest_pages[*dest_index]; + if (!dp) { + /* No page present. Skip decompression of this sub-block. */ + cb = cb_sb_end; + + /* Advance destination position to next sub-block. */ + *dest_ofs = (*dest_ofs + NTFS_SB_SIZE) & ~PAGE_CACHE_MASK; + if (!*dest_ofs && (++*dest_index > dest_max_index)) + goto return_overflow; + goto do_next_sb; + } + + /* We have a valid destination page. Setup the destination pointers. */ + dp_addr = (u8*)page_address(dp) + do_sb_start; + + /* Now, we are ready to process the current sub-block (sb). */ + if (!(le16_to_cpup((u16*)cb) & NTFS_SB_IS_COMPRESSED)) { + ntfs_debug("Found uncompressed sub-block."); + /* This sb is not compressed, just copy it into destination. */ + + /* Advance source position to first data byte. */ + cb += 2; + + /* An uncompressed sb must be full size. */ + if (cb_sb_end - cb != NTFS_SB_SIZE) + goto return_overflow; + + /* Copy the block and advance the source position. */ + memcpy(dp_addr, cb, NTFS_SB_SIZE); + cb += NTFS_SB_SIZE; + + /* Advance destination position to next sub-block. */ + *dest_ofs += NTFS_SB_SIZE; + if (!(*dest_ofs &= ~PAGE_CACHE_MASK)) { +finalize_page: + /* + * First stage: add current page index to array of + * completed pages. + */ + completed_pages[nr_completed_pages++] = *dest_index; + if (++*dest_index > dest_max_index) + goto return_overflow; + } + goto do_next_sb; + } + ntfs_debug("Found compressed sub-block."); + /* This sb is compressed, decompress it into destination. */ + + /* Setup destination pointers. */ + dp_sb_start = dp_addr; + dp_sb_end = dp_sb_start + NTFS_SB_SIZE; + + /* Forward to the first tag in the sub-block. */ + cb += 2; +do_next_tag: + if (cb == cb_sb_end) { + /* Check if the decompressed sub-block was not full-length. */ + if (dp_addr < dp_sb_end) { + int nr_bytes = do_sb_end - *dest_ofs; + + ntfs_debug("Filling incomplete sub-block with " + "zeroes."); + /* Zero remainder and update destination position. */ + memset(dp_addr, 0, nr_bytes); + *dest_ofs += nr_bytes; + } + /* We have finished the current sub-block. */ + if (!(*dest_ofs &= ~PAGE_CACHE_MASK)) + goto finalize_page; + goto do_next_sb; + } + + /* Check we are still in range. */ + if (cb > cb_sb_end || dp_addr > dp_sb_end) + goto return_overflow; + + /* Get the next tag and advance to first token. */ + tag = *cb++; + + /* Parse the eight tokens described by the tag. */ + for (token = 0; token < 8; token++, tag >>= 1) { + u16 lg, pt, length, max_non_overlap; + register u16 i; + u8 *dp_back_addr; + + /* Check if we are done / still in range. */ + if (cb >= cb_sb_end || dp_addr > dp_sb_end) + break; + + /* Determine token type and parse appropriately.*/ + if ((tag & NTFS_TOKEN_MASK) == NTFS_SYMBOL_TOKEN) { + /* + * We have a symbol token, copy the symbol across, and + * advance the source and destination positions. + */ + *dp_addr++ = *cb++; + ++*dest_ofs; + + /* Continue with the next token. */ + continue; + } + + /* + * We have a phrase token. Make sure it is not the first tag in + * the sb as this is illegal and would confuse the code below. + */ + if (dp_addr == dp_sb_start) + goto return_overflow; + + /* + * Determine the number of bytes to go back (p) and the number + * of bytes to copy (l). We use an optimized algorithm in which + * we first calculate log2(current destination position in sb), + * which allows determination of l and p in O(1) rather than + * O(n). We just need an arch-optimized log2() function now. + */ + lg = 0; + for (i = *dest_ofs - do_sb_start - 1; i >= 0x10; i >>= 1) + lg++; + + /* Get the phrase token into i. */ + pt = le16_to_cpup((u16*)cb); + + /* + * Calculate starting position of the byte sequence in + * the destination using the fact that p = (pt >> (12 - lg)) + 1 + * and make sure we don't go too far back. + */ + dp_back_addr = dp_addr - (pt >> (12 - lg)) - 1; + if (dp_back_addr < dp_sb_start) + goto return_overflow; + + /* Now calculate the length of the byte sequence. */ + length = (pt & (0xfff >> lg)) + 3; + + /* Advance destination position and verify it is in range. */ + *dest_ofs += length; + if (*dest_ofs > do_sb_end) + goto return_overflow; + + /* The number of non-overlapping bytes. */ + max_non_overlap = dp_addr - dp_back_addr; + + if (length <= max_non_overlap) { + /* The byte sequence doesn't overlap, just copy it. */ + memcpy(dp_addr, dp_back_addr, length); + + /* Advance destination pointer. */ + dp_addr += length; + } else { + /* + * The byte sequence does overlap, copy non-overlapping + * part and then do a slow byte by byte copy for the + * overlapping part. Also, advance the destination + * pointer. + */ + memcpy(dp_addr, dp_back_addr, max_non_overlap); + dp_addr += max_non_overlap; + dp_back_addr += max_non_overlap; + length -= max_non_overlap; + while (length--) + *dp_addr++ = *dp_back_addr++; + } + + /* Advance source position and continue with the next token. */ + cb += 2; + } + + /* No tokens left in the current tag. Continue with the next tag. */ + goto do_next_tag; + +return_overflow: + ntfs_error(NULL, "Failed. Returning -EOVERFLOW.\n"); + goto return_error; +} + +/** + * ntfs_read_compressed_block - read a compressed block into the page cache + * @page: locked page in the compression block(s) we need to read + * + * When we are called the page has already been verified to be locked and the + * attribute is known to be non-resident, not encrypted, but compressed. + * + * 1. Determine which compression block(s) @page is in. + * 2. Get hold of all pages corresponding to this/these compression block(s). + * 3. Read the (first) compression block. + * 4. Decompress it into the corresponding pages. + * 5. Throw the compressed data away and proceed to 3. for the next compression + * block or return success if no more compression blocks left. + * + * Warning: We have to be careful what we do about existing pages. They might + * have been written to so that we would lose data if we were to just overwrite + * them with the out-of-date uncompressed data. + * + * FIXME: For PAGE_CACHE_SIZE > cb_size we are not doing the Right Thing(TM) at + * the end of the file I think. We need to detect this case and zero the out + * of bounds remainder of the page in question and mark it as handled. At the + * moment we would just return -EIO on such a page. This bug will only become + * apparent if pages are above 8kiB and the NTFS volume only uses 512 byte + * clusters so is probably not going to be seen by anyone. Still this should + * be fixed. (AIA) + * + * FIXME: Again for PAGE_CACHE_SIZE > cb_size we are screwing up both in + * handling sparse and compressed cbs. (AIA) + * + * FIXME: At the moment we don't do any zeroing out in the case that + * initialized_size is less than data_size. This should be safe because of the + * nature of the compression algorithm used. Just in case we check and output + * an error message in read inode if the two sizes are not equal for a + * compressed file. (AIA) + */ +int ntfs_read_compressed_block(struct page *page) +{ + struct address_space *mapping = page->mapping; + ntfs_inode *ni = NTFS_I(mapping->host); + ntfs_volume *vol = ni->vol; + kdev_t dev = vol->sb->s_dev; + run_list_element *rl; + unsigned long block_size = vol->sb->s_blocksize; + unsigned char block_size_bits = vol->sb->s_blocksize_bits; + u8 *cb, *cb_pos, *cb_end; + struct buffer_head **bhs; + unsigned long offset, index = page->index; + u32 cb_size = ni->itype.compressed.block_size; + u64 cb_size_mask = cb_size - 1UL; + VCN vcn; + LCN lcn; + /* The first wanted vcn (minimum alignment is PAGE_CACHE_SIZE). */ + VCN start_vcn = (((s64)index << PAGE_CACHE_SHIFT) & ~cb_size_mask) >> + vol->cluster_size_bits; + /* + * The first vcn after the last wanted vcn (minumum alignment is again + * PAGE_CACHE_SIZE. + */ + VCN end_vcn = ((((s64)(index + 1UL) << PAGE_CACHE_SHIFT) + cb_size - 1) + & ~cb_size_mask) >> vol->cluster_size_bits; + /* Number of compression blocks (cbs) in the wanted vcn range. */ + unsigned int nr_cbs = (end_vcn - start_vcn) << vol->cluster_size_bits + >> ni->itype.compressed.block_size_bits; + /* + * Number of pages required to store the uncompressed data from all + * compression blocks (cbs) overlapping @page. Due to alignment + * guarantees of start_vcn and end_vcn, no need to round up here. + */ + unsigned int nr_pages = (end_vcn - start_vcn) << + vol->cluster_size_bits >> PAGE_CACHE_SHIFT; + unsigned int xpage, max_page, cur_page, cur_ofs, i; + unsigned int cb_clusters, cb_max_ofs; + int block, max_block, cb_max_page, bhs_size, nr_bhs, err = 0; + struct page **pages; + unsigned char xpage_done = 0; + + ntfs_debug("Entering, page->index = 0x%lx, cb_size = 0x%x, nr_pages = " + "%i.", index, cb_size, nr_pages); + /* + * Bad things happen if we get here for anything that is not an + * unnamed $DATA attribute. + */ + BUG_ON(ni->type != AT_DATA); + BUG_ON(ni->name_len); + + pages = kmalloc(nr_pages * sizeof(struct page *), GFP_NOFS); + + /* Allocate memory to store the buffer heads we need. */ + bhs_size = cb_size / block_size * sizeof(struct buffer_head *); + bhs = kmalloc(bhs_size, GFP_NOFS); + + if (unlikely(!pages || !bhs)) { + kfree(bhs); + kfree(pages); + SetPageError(page); + unlock_page(page); + ntfs_error(vol->sb, "Failed to allocate internal buffers."); + return -ENOMEM; + } + + /* + * We have already been given one page, this is the one we must do. + * Once again, the alignment guarantees keep it simple. + */ + offset = start_vcn << vol->cluster_size_bits >> PAGE_CACHE_SHIFT; + xpage = index - offset; + pages[xpage] = page; + /* + * The remaining pages need to be allocated and inserted into the page + * cache, alignment guarantees keep all the below much simpler. (-8 + */ + max_page = ((VFS_I(ni)->i_size + PAGE_CACHE_SIZE - 1) >> + PAGE_CACHE_SHIFT) - offset; + if (nr_pages < max_page) + max_page = nr_pages; + for (i = 0; i < max_page; i++, offset++) { + if (i != xpage) + pages[i] = grab_cache_page_nowait(mapping, offset); + page = pages[i]; + if (page) { + /* + * We only (re)read the page if it isn't already read + * in and/or dirty or we would be losing data or at + * least wasting our time. + */ + if (!PageDirty(page) && (!PageUptodate(page) || + PageError(page))) { + ClearPageError(page); + kmap(page); + continue; + } + unlock_page(page); + page_cache_release(page); + pages[i] = NULL; + } + } + + /* + * We have the run list, and all the destination pages we need to fill. + * Now read the first compression block. + */ + cur_page = 0; + cur_ofs = 0; + cb_clusters = ni->itype.compressed.block_clusters; +do_next_cb: + nr_cbs--; + nr_bhs = 0; + + /* Read all cb buffer heads one cluster at a time. */ + rl = NULL; + for (vcn = start_vcn, start_vcn += cb_clusters; vcn < start_vcn; + vcn++) { + BOOL is_retry = FALSE; + + if (!rl) { +lock_retry_remap: + down_read(&ni->run_list.lock); + rl = ni->run_list.rl; + } + if (likely(rl != NULL)) { + /* Seek to element containing target vcn. */ + while (rl->length && rl[1].vcn <= vcn) + rl++; + lcn = vcn_to_lcn(rl, vcn); + } else + lcn = (LCN)LCN_RL_NOT_MAPPED; + ntfs_debug("Reading vcn = 0x%Lx, lcn = 0x%Lx.", + (long long)vcn, (long long)lcn); + if (lcn < 0) { + /* + * When we reach the first sparse cluster we have + * finished with the cb. + */ + if (lcn == LCN_HOLE) + break; + if (is_retry || lcn != LCN_RL_NOT_MAPPED) + goto rl_err; + is_retry = TRUE; + /* + * Attempt to map run list, dropping lock for the + * duration. + */ + up_read(&ni->run_list.lock); + if (!map_run_list(ni, vcn)) + goto lock_retry_remap; + goto map_rl_err; + } + block = lcn << vol->cluster_size_bits >> block_size_bits; + /* Read the lcn from device in chunks of block_size bytes. */ + max_block = block + (vol->cluster_size >> block_size_bits); + do { + ntfs_debug("block = 0x%x.", block); + if (unlikely(!(bhs[nr_bhs] = getblk(dev, block, + block_size)))) + goto getblk_err; + nr_bhs++; + } while (++block < max_block); + } + + /* Release the lock if we took it. */ + if (rl) + up_read(&ni->run_list.lock); + + /* Setup and initiate io on all buffer heads. */ + for (i = 0; i < nr_bhs; i++) { + struct buffer_head *tbh = bhs[i]; + + if (unlikely(test_set_buffer_locked(tbh))) + continue; + if (unlikely(buffer_uptodate(tbh))) { + unlock_buffer(tbh); + continue; + } + atomic_inc(&tbh->b_count); + tbh->b_end_io = end_buffer_io_sync; + submit_bh(READ, tbh); + } + + /* Wait for io completion on all buffer heads. */ + for (i = 0; i < nr_bhs; i++) { + struct buffer_head *tbh = bhs[i]; + + if (buffer_uptodate(tbh)) + continue; + wait_on_buffer(tbh); + /* + * We need an optimization barrier here, otherwise we start + * hitting the below fixup code when accessing a loopback + * mounted ntfs partition. This indicates either there is a + * race condition in the loop driver or, more likely, gcc + * overoptimises the code without the barrier and it doesn't + * do the Right Thing(TM). + */ + barrier(); + if (unlikely(!buffer_uptodate(tbh))) { + ntfs_warning(vol->sb, "Buffer is unlocked but not " + "uptodate! Unplugging the disk queue " + "and rescheduling."); + get_bh(tbh); + run_task_queue(&tq_disk); + schedule(); + put_bh(tbh); + if (unlikely(!buffer_uptodate(tbh))) + goto read_err; + ntfs_warning(vol->sb, "Buffer is now uptodate. Good."); + } + } + + /* + * Get the compression buffer. We must not sleep any more + * until we are finished with it. + */ + spin_lock(&ntfs_cb_lock); + cb = ntfs_compression_buffer; + + BUG_ON(!cb); + + cb_pos = cb; + cb_end = cb + cb_size; + + /* Copy the buffer heads into the contiguous buffer. */ + for (i = 0; i < nr_bhs; i++) { + memcpy(cb_pos, bhs[i]->b_data, block_size); + cb_pos += block_size; + } + + /* Just a precaution. */ + if (cb_pos + 2 <= cb + cb_size) + *(u16*)cb_pos = 0; + + /* Reset cb_pos back to the beginning. */ + cb_pos = cb; + + /* We now have both source (if present) and destination. */ + ntfs_debug("Successfully read the compression block."); + + /* The last page and maximum offset within it for the current cb. */ + cb_max_page = (cur_page << PAGE_CACHE_SHIFT) + cur_ofs + cb_size; + cb_max_ofs = cb_max_page & ~PAGE_CACHE_MASK; + cb_max_page >>= PAGE_CACHE_SHIFT; + + /* Catch end of file inside a compression block. */ + if (cb_max_page > max_page) + cb_max_page = max_page; + + if (vcn == start_vcn - cb_clusters) { + /* Sparse cb, zero out page range overlapping the cb. */ + ntfs_debug("Found sparse compression block."); + /* We can sleep from now on, so we drop lock. */ + spin_unlock(&ntfs_cb_lock); + if (cb_max_ofs) + cb_max_page--; + for (; cur_page < cb_max_page; cur_page++) { + page = pages[cur_page]; + if (page) { + /* + * FIXME: Using clear_page() will become wrong + * when we get PAGE_CACHE_SIZE != PAGE_SIZE but + * for now there is no problem. + */ + if (likely(!cur_ofs)) + clear_page(page_address(page)); + else + memset(page_address(page) + cur_ofs, 0, + PAGE_CACHE_SIZE - + cur_ofs); + flush_dcache_page(page); + kunmap(page); + SetPageUptodate(page); + unlock_page(page); + if (cur_page == xpage) + xpage_done = 1; + else + page_cache_release(page); + pages[cur_page] = NULL; + } + cb_pos += PAGE_CACHE_SIZE - cur_ofs; + cur_ofs = 0; + if (cb_pos >= cb_end) + break; + } + /* If we have a partial final page, deal with it now. */ + if (cb_max_ofs && cb_pos < cb_end) { + page = pages[cur_page]; + if (page) + memset(page_address(page) + cur_ofs, 0, + cb_max_ofs - cur_ofs); + /* + * No need to update cb_pos at this stage: + * cb_pos += cb_max_ofs - cur_ofs; + */ + cur_ofs = cb_max_ofs; + } + } else if (vcn == start_vcn) { + /* We can't sleep so we need two stages. */ + unsigned int cur2_page = cur_page; + unsigned int cur_ofs2 = cur_ofs; + u8 *cb_pos2 = cb_pos; + + ntfs_debug("Found uncompressed compression block."); + /* Uncompressed cb, copy it to the destination pages. */ + /* + * TODO: As a big optimization, we could detect this case + * before we read all the pages and use block_read_full_page() + * on all full pages instead (we still have to treat partial + * pages especially but at least we are getting rid of the + * synchronous io for the majority of pages. + * Or if we choose not to do the read-ahead/-behind stuff, we + * could just return block_read_full_page(pages[xpage]) as long + * as PAGE_CACHE_SIZE <= cb_size. + */ + if (cb_max_ofs) + cb_max_page--; + /* First stage: copy data into destination pages. */ + for (; cur_page < cb_max_page; cur_page++) { + page = pages[cur_page]; + if (page) + memcpy(page_address(page) + cur_ofs, cb_pos, + PAGE_CACHE_SIZE - cur_ofs); + cb_pos += PAGE_CACHE_SIZE - cur_ofs; + cur_ofs = 0; + if (cb_pos >= cb_end) + break; + } + /* If we have a partial final page, deal with it now. */ + if (cb_max_ofs && cb_pos < cb_end) { + page = pages[cur_page]; + if (page) + memcpy(page_address(page) + cur_ofs, cb_pos, + cb_max_ofs - cur_ofs); + cb_pos += cb_max_ofs - cur_ofs; + cur_ofs = cb_max_ofs; + } + /* We can sleep from now on, so drop lock. */ + spin_unlock(&ntfs_cb_lock); + /* Second stage: finalize pages. */ + for (; cur2_page < cb_max_page; cur2_page++) { + page = pages[cur2_page]; + if (page) { + /* + * If we are outside the initialized size, zero + * the out of bounds page range. + */ + handle_bounds_compressed_page(ni, page); + flush_dcache_page(page); + kunmap(page); + SetPageUptodate(page); + unlock_page(page); + if (cur2_page == xpage) + xpage_done = 1; + else + page_cache_release(page); + pages[cur2_page] = NULL; + } + cb_pos2 += PAGE_CACHE_SIZE - cur_ofs2; + cur_ofs2 = 0; + if (cb_pos2 >= cb_end) + break; + } + } else { + /* Compressed cb, decompress it into the destination page(s). */ + unsigned int prev_cur_page = cur_page; + + ntfs_debug("Found compressed compression block."); + err = ntfs_decompress(pages, &cur_page, &cur_ofs, + cb_max_page, cb_max_ofs, xpage, &xpage_done, + cb_pos, cb_size - (cb_pos - cb)); + /* + * We can sleep from now on, lock already dropped by + * ntfs_decompress(). + */ + if (err) { + ntfs_error(vol->sb, "ntfs_decompress() failed in inode " + "0x%lx with error code %i. Skipping " + "this compression block.\n", + ni->mft_no, -err); + /* Release the unfinished pages. */ + for (; prev_cur_page < cur_page; prev_cur_page++) { + page = pages[prev_cur_page]; + if (page) { + if (prev_cur_page == xpage && + !xpage_done) + SetPageError(page); + flush_dcache_page(page); + kunmap(page); + unlock_page(page); + if (prev_cur_page != xpage) + page_cache_release(page); + pages[prev_cur_page] = NULL; + } + } + } + } + + /* Release the buffer heads. */ + for (i = 0; i < nr_bhs; i++) + brelse(bhs[i]); + + /* Do we have more work to do? */ + if (nr_cbs) + goto do_next_cb; + + /* We no longer need the list of buffer heads. */ + kfree(bhs); + + /* Clean up if we have any pages left. Should never happen. */ + for (cur_page = 0; cur_page < max_page; cur_page++) { + page = pages[cur_page]; + if (page) { + ntfs_error(vol->sb, "Still have pages left! " + "Terminating them with extreme " + "prejudice."); + if (cur_page == xpage && !xpage_done) + SetPageError(page); + flush_dcache_page(page); + kunmap(page); + unlock_page(page); + if (cur_page != xpage) + page_cache_release(page); + pages[cur_page] = NULL; + } + } + + /* We no longer need the list of pages. */ + kfree(pages); + + /* If we have completed the requested page, we return success. */ + if (likely(xpage_done)) + return 0; + + ntfs_debug("Failed. Returning error code %s.", err == -EOVERFLOW ? + "EOVERFLOW" : (!err ? "EIO" : "unkown error")); + return err < 0 ? err : -EIO; + +read_err: + ntfs_error(vol->sb, "IO error while reading compressed data."); + /* Release the buffer heads. */ + for (i = 0; i < nr_bhs; i++) + brelse(bhs[i]); + goto err_out; + +map_rl_err: + ntfs_error(vol->sb, "map_run_list() failed. Cannot read compression " + "block."); + goto err_out; + +rl_err: + up_read(&ni->run_list.lock); + ntfs_error(vol->sb, "vcn_to_lcn() failed. Cannot read compression " + "block."); + goto err_out; + +getblk_err: + up_read(&ni->run_list.lock); + ntfs_error(vol->sb, "getblk() failed. Cannot read compression block."); + +err_out: + kfree(bhs); + for (i = cur_page; i < max_page; i++) { + page = pages[i]; + if (page) { + if (i == xpage && !xpage_done) + SetPageError(page); + flush_dcache_page(page); + kunmap(page); + unlock_page(page); + if (i != xpage) + page_cache_release(page); + } + } + kfree(pages); + return -EIO; +} + diff -urN linux-2.4.24-vanilla/fs/ntfs/debug.c linux-2.4.24-ntfs-2.1.6a/fs/ntfs/debug.c --- linux-2.4.24-vanilla/fs/ntfs/debug.c 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.4.24-ntfs-2.1.6a/fs/ntfs/debug.c 2004-01-21 14:28:25.000000000 +0000 @@ -0,0 +1,180 @@ +/* + * debug.c - NTFS kernel debug support. Part of the Linux-NTFS project. + * + * Copyright (c) 2001,2002 Anton Altaparmakov. + * + * This program/include file is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as published + * by the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program/include file is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program (in the main directory of the Linux-NTFS + * distribution in the file COPYING); if not, write to the Free Software + * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#ifdef CONFIG_PREEMPT +# include +#endif + +#include "debug.h" + +/* + * A static buffer to hold the error string being displayed and a spinlock + * to protect concurrent accesses to it. + */ +static char err_buf[1024]; +static spinlock_t err_buf_lock = SPIN_LOCK_UNLOCKED; + +/** + * __ntfs_warning - output a warning to the syslog + * @function: name of function outputting the warning + * @sb: super block of mounted ntfs filesystem + * @fmt: warning string containing format specifications + * @...: a variable number of arguments specified in @fmt + * + * Outputs a warning to the syslog for the mounted ntfs filesystem described + * by @sb. + * + * @fmt and the corresponding @... is printf style format string containing + * the warning string and the corresponding format arguments, respectively. + * + * @function is the name of the function from which __ntfs_warning is being + * called. + * + * Note, you should be using debug.h::ntfs_warning(@sb, @fmt, @...) instead + * as this provides the @function parameter automatically. + */ +void __ntfs_warning(const char *function, const struct super_block *sb, + const char *fmt, ...) +{ + va_list args; + int flen = 0; + + if (function) + flen = strlen(function); + spin_lock(&err_buf_lock); + va_start(args, fmt); + vsnprintf(err_buf, sizeof(err_buf), fmt, args); + va_end(args); + if (sb) + printk(KERN_ERR "NTFS-fs warning (device %s): %s(): %s\n", + kdevname(sb->s_dev), flen ? function : "", err_buf); + else + printk(KERN_ERR "NTFS-fs warning: %s(): %s\n", + flen ? function : "", err_buf); + spin_unlock(&err_buf_lock); +} + +/** + * __ntfs_error - output an error to the syslog + * @function: name of function outputting the error + * @sb: super block of mounted ntfs filesystem + * @fmt: error string containing format specifications + * @...: a variable number of arguments specified in @fmt + * + * Outputs an error to the syslog for the mounted ntfs filesystem described + * by @sb. + * + * @fmt and the corresponding @... is printf style format string containing + * the error string and the corresponding format arguments, respectively. + * + * @function is the name of the function from which __ntfs_error is being + * called. + * + * Note, you should be using debug.h::ntfs_error(@sb, @fmt, @...) instead + * as this provides the @function parameter automatically. + */ +void __ntfs_error(const char *function, const struct super_block *sb, + const char *fmt, ...) +{ + va_list args; + int flen = 0; + + if (function) + flen = strlen(function); + spin_lock(&err_buf_lock); + va_start(args, fmt); + vsnprintf(err_buf, sizeof(err_buf), fmt, args); + va_end(args); + if (sb) + printk(KERN_ERR "NTFS-fs error (device %s): %s(): %s\n", + kdevname(sb->s_dev), flen ? function : "", err_buf); + else + printk(KERN_ERR "NTFS-fs error: %s(): %s\n", + flen ? function : "", err_buf); + spin_unlock(&err_buf_lock); +} + +#ifdef DEBUG + +/* If 1, output debug messages, and if 0, don't. */ +int debug_msgs = 0; + +void __ntfs_debug (const char *file, int line, const char *function, + const char *fmt, ...) +{ + va_list args; + int flen = 0; + + if (!debug_msgs) + return; + if (function) + flen = strlen(function); + spin_lock(&err_buf_lock); + va_start(args, fmt); + vsnprintf(err_buf, sizeof(err_buf), fmt, args); + va_end(args); + printk(KERN_DEBUG "NTFS-fs DEBUG (%s, %d): %s: %s\n", + file, line, flen ? function : "", err_buf); + spin_unlock(&err_buf_lock); +} + +/* Dump a run list. Caller has to provide synchronisation for @rl. */ +void ntfs_debug_dump_runlist(const run_list_element *rl) +{ + int i; + const char *lcn_str[5] = { "LCN_HOLE ", "LCN_RL_NOT_MAPPED", + "LCN_ENOENT ", "LCN_EINVAL ", + "LCN_unknown " }; + + if (!debug_msgs) + return; + printk(KERN_DEBUG "NTFS-fs DEBUG: Dumping run list (values " + "in hex):\n"); + if (!rl) { + printk(KERN_DEBUG "Run list not present.\n"); + return; + } + printk(KERN_DEBUG "VCN LCN Run length\n"); + for (i = 0; ; i++) { + LCN lcn = (rl + i)->lcn; + + if (lcn < (LCN)0) { + int index = -lcn - 1; + + if (index > -LCN_EINVAL - 1) + index = 4; + printk(KERN_DEBUG "%-16Lx %s %-16Lx%s\n", + (rl + i)->vcn, lcn_str[index], + (rl + i)->length, (rl + i)->length ? + "" : " (run list end)"); + } else + printk(KERN_DEBUG "%-16Lx %-16Lx %-16Lx%s\n", + (rl + i)->vcn, (rl + i)->lcn, + (rl + i)->length, (rl + i)->length ? + "" : " (run list end)"); + if (!(rl + i)->length) + break; + } +} + +#endif + diff -urN linux-2.4.24-vanilla/fs/ntfs/debug.h linux-2.4.24-ntfs-2.1.6a/fs/ntfs/debug.h --- linux-2.4.24-vanilla/fs/ntfs/debug.h 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.4.24-ntfs-2.1.6a/fs/ntfs/debug.h 2004-01-21 14:31:43.000000000 +0000 @@ -0,0 +1,72 @@ +/* + * debug.h - NTFS kernel debug support. Part of the Linux-NTFS project. + * + * Copyright (c) 2001,2002 Anton Altaparmakov. + * + * This program/include file is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as published + * by the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program/include file is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program (in the main directory of the Linux-NTFS + * distribution in the file COPYING); if not, write to the Free Software + * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _LINUX_NTFS_DEBUG_H +#define _LINUX_NTFS_DEBUG_H + +#include +#include +#include +#include + +#include "inode.h" +#include "attrib.h" + +#ifdef DEBUG + +extern int debug_msgs; + +#if 0 /* Fool kernel-doc since it doesn't do macros yet */ +/** + * ntfs_debug - write a debug level message to syslog + * @f: a printf format string containing the message + * @...: the variables to substitute into @f + * + * ntfs_debug() writes a DEBUG level message to the syslog but only if the + * driver was compiled with -DDEBUG. Otherwise, the call turns into a NOP. + */ +static void ntfs_debug(const char *f, ...); +#endif + +extern void __ntfs_debug (const char *file, int line, const char *function, + const char *format, ...) __attribute__ ((format (printf, 4, 5))); +#define ntfs_debug(f, a...) \ + __ntfs_debug(__FILE__, __LINE__, __FUNCTION__, f, ##a) + +extern void ntfs_debug_dump_runlist(const run_list_element *rl); + +#else /* !DEBUG */ + +#define ntfs_debug(f, a...) do {} while (0) +#define ntfs_debug_dump_runlist(rl) do {} while (0) + +#endif /* !DEBUG */ + +extern void __ntfs_warning(const char *function, const struct super_block *sb, + const char *fmt, ...) __attribute__ ((format (printf, 3, 4))); +#define ntfs_warning(sb, f, a...) __ntfs_warning(__FUNCTION__, sb, f, ##a) + +extern void __ntfs_error(const char *function, const struct super_block *sb, + const char *fmt, ...) __attribute__ ((format (printf, 3, 4))); +#define ntfs_error(sb, f, a...) __ntfs_error(__FUNCTION__, sb, f, ##a) + +#endif /* _LINUX_NTFS_DEBUG_H */ + diff -urN linux-2.4.24-vanilla/fs/ntfs/dir.c linux-2.4.24-ntfs-2.1.6a/fs/ntfs/dir.c --- linux-2.4.24-vanilla/fs/ntfs/dir.c 2003-11-28 18:26:21.000000000 +0000 +++ linux-2.4.24-ntfs-2.1.6a/fs/ntfs/dir.c 2004-01-21 14:28:25.000000000 +0000 @@ -1,1103 +1,1419 @@ -/* - * dir.c +/** + * dir.c - NTFS kernel directory operations. Part of the Linux-NTFS project. + * + * Copyright (c) 2001-2003 Anton Altaparmakov + * Copyright (c) 2002 Richard Russon + * + * This program/include file is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as published + * by the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. * - * Copyright (C) 1995-1997, 1999 Martin von Löwis - * Copyright (C) 1999 Steve Dodd - * Copyright (C) 1999 Joseph Malicki - * Copyright (C) 2001 Anton Altaparmakov (AIA) + * This program/include file is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program (in the main directory of the Linux-NTFS + * distribution in the file COPYING); if not, write to the Free Software + * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -#include "ntfstypes.h" -#include "struct.h" -#include "dir.h" -#include "macros.h" - -#include -#include "super.h" -#include "inode.h" -#include "attr.h" -#include "support.h" -#include "util.h" +#include #include -#include - -static char I30[] = "$I30"; - -/* An index record should start with INDX, and the last word in each block - * should contain the check value. If it passes, the original values need to - * be restored. */ -int ntfs_check_index_record(ntfs_inode *ino, char *record) -{ - return ntfs_fixup_record(record, "INDX", ino->u.index.recordsize); -} - -static inline int ntfs_is_top(ntfs_u64 stack) -{ - return stack == 14; -} - -static int ntfs_pop(ntfs_u64 *stack) -{ - static int width[16] = {1,2,1,3,1,2,1,4,1,2,1,3,1,2,1,-1}; - int res = -1; - - switch (width[*stack & 15]) { - case 1: - res = (int)((*stack & 15) >> 1); - *stack >>= 4; - break; - case 2: - res = (int)(((*stack & 63) >> 2) + 7); - *stack >>= 6; - break; - case 3: - res = (int)(((*stack & 255) >> 3) + 23); - *stack >>= 8; - break; - case 4: - res = (int)(((*stack & 1023) >> 4) + 55); - *stack >>= 10; - break; - default: - ntfs_error("Unknown encoding\n"); - } - return res; -} - -static inline unsigned int ntfs_top(void) -{ - return 14; -} - -static ntfs_u64 ntfs_push(ntfs_u64 stack, int i) -{ - if (i < 7) - return (stack << 4) | (i << 1); - if (i < 23) - return (stack << 6) | ((i - 7) << 2) | 1; - if (i < 55) - return (stack << 8) | ((i - 23) << 3) | 3; - if (i < 120) - return (stack << 10) | ((i - 55) << 4) | 7; - ntfs_error("Too many entries\n"); - return ~((ntfs_u64)0); -} +#include "ntfs.h" +#include "dir.h" -#if 0 -static void ntfs_display_stack(ntfs_u64 stack) +/** + * The little endian Unicode string $I30 as a global constant. + */ +uchar_t I30[5] = { const_cpu_to_le16('$'), const_cpu_to_le16('I'), + const_cpu_to_le16('3'), const_cpu_to_le16('0'), + const_cpu_to_le16(0) }; + +/** + * ntfs_lookup_inode_by_name - find an inode in a directory given its name + * @dir_ni: ntfs inode of the directory in which to search for the name + * @uname: Unicode name for which to search in the directory + * @uname_len: length of the name @uname in Unicode characters + * @res: return the found file name if necessary (see below) + * + * Look for an inode with name @uname in the directory with inode @dir_ni. + * ntfs_lookup_inode_by_name() walks the contents of the directory looking for + * the Unicode name. If the name is found in the directory, the corresponding + * inode number (>= 0) is returned as a mft reference in cpu format, i.e. it + * is a 64-bit number containing the sequence number. + * + * On error, a negative value is returned corresponding to the error code. In + * particular if the inode is not found -ENOENT is returned. Note that you + * can't just check the return value for being negative, you have to check the + * inode number for being negative which you can extract using MREC(return + * value). + * + * Note, @uname_len does not include the (optional) terminating NULL character. + * + * Note, we look for a case sensitive match first but we also look for a case + * insensitive match at the same time. If we find a case insensitive match, we + * save that for the case that we don't find an exact match, where we return + * the case insensitive match and setup @res (which we allocate!) with the mft + * reference, the file name type, length and with a copy of the little endian + * Unicode file name itself. If we match a file name which is in the DOS name + * space, we only return the mft reference and file name type in @res. + * ntfs_lookup() then uses this to find the long file name in the inode itself. + * This is to avoid polluting the dcache with short file names. We want them to + * work but we don't care for how quickly one can access them. This also fixes + * the dcache aliasing issues. + */ +MFT_REF ntfs_lookup_inode_by_name(ntfs_inode *dir_ni, const uchar_t *uname, + const int uname_len, ntfs_name **res) { - while(!ntfs_is_top(stack)) - { - printf("%d ", ntfs_pop(&stack)); + ntfs_volume *vol = dir_ni->vol; + struct super_block *sb = vol->sb; + MFT_RECORD *m; + INDEX_ROOT *ir; + INDEX_ENTRY *ie; + INDEX_ALLOCATION *ia; + u8 *index_end; + u64 mref; + attr_search_context *ctx; + int err, rc; + VCN vcn, old_vcn; + struct address_space *ia_mapping; + struct page *page; + u8 *kaddr; + ntfs_name *name = NULL; + + /* Get hold of the mft record for the directory. */ + m = map_mft_record(dir_ni); + if (unlikely(IS_ERR(m))) { + ntfs_error(sb, "map_mft_record() failed with error code %ld.", + -PTR_ERR(m)); + return ERR_MREF(PTR_ERR(m)); + } + ctx = get_attr_search_ctx(dir_ni, m); + if (unlikely(!ctx)) { + err = -ENOMEM; + goto err_out; + } + /* Find the index root attribute in the mft record. */ + if (!lookup_attr(AT_INDEX_ROOT, I30, 4, CASE_SENSITIVE, 0, NULL, 0, + ctx)) { + ntfs_error(sb, "Index root attribute missing in directory " + "inode 0x%lx.", dir_ni->mft_no); + err = -EIO; + goto err_out; } - printf("\n"); -} -#endif - -/* True if the entry points to another block of entries. */ -static inline int ntfs_entry_has_subnodes(char *entry) -{ - return (NTFS_GETU16(entry + 0xc) & 1); -} - -/* True if it is not the 'end of dir' entry. */ -static inline int ntfs_entry_is_used(char *entry) -{ - return !(NTFS_GETU16(entry + 0xc) & 2); -} + /* Get to the index root value (it's been verified in read_inode). */ + ir = (INDEX_ROOT*)((u8*)ctx->attr + + le16_to_cpu(ctx->attr->data.resident.value_offset)); + index_end = (u8*)&ir->index + le32_to_cpu(ir->index.index_length); + /* The first index entry. */ + ie = (INDEX_ENTRY*)((u8*)&ir->index + + le32_to_cpu(ir->index.entries_offset)); + /* + * Loop until we exceed valid memory (corruption case) or until we + * reach the last entry. + */ + for (;; ie = (INDEX_ENTRY*)((u8*)ie + le16_to_cpu(ie->length))) { + /* Bounds checks. */ + if ((u8*)ie < (u8*)ctx->mrec || (u8*)ie + + sizeof(INDEX_ENTRY_HEADER) > index_end || + (u8*)ie + le16_to_cpu(ie->key_length) > + index_end) + goto dir_err_out; + /* + * The last entry cannot contain a name. It can however contain + * a pointer to a child node in the B+tree so we just break out. + */ + if (ie->flags & INDEX_ENTRY_END) + break; + /* + * We perform a case sensitive comparison and if that matches + * we are done and return the mft reference of the inode (i.e. + * the inode number together with the sequence number for + * consistency checking). We convert it to cpu format before + * returning. + */ + if (ntfs_are_names_equal(uname, uname_len, + (uchar_t*)&ie->key.file_name.file_name, + ie->key.file_name.file_name_length, + CASE_SENSITIVE, vol->upcase, vol->upcase_len)) { +found_it: + /* + * We have a perfect match, so we don't need to care + * about having matched imperfectly before, so we can + * free name and set *res to NULL. + * However, if the perfect match is a short file name, + * we need to signal this through *res, so that + * ntfs_lookup() can fix dcache aliasing issues. + * As an optimization we just reuse an existing + * allocation of *res. + */ + if (ie->key.file_name.file_name_type == FILE_NAME_DOS) { + if (!name) { + name = kmalloc(sizeof(ntfs_name), + GFP_NOFS); + if (!name) { + err = -ENOMEM; + goto err_out; + } + } + name->mref = le64_to_cpu( + ie->data.dir.indexed_file); + name->type = FILE_NAME_DOS; + name->len = 0; + *res = name; + } else { + if (name) + kfree(name); + *res = NULL; + } + mref = le64_to_cpu(ie->data.dir.indexed_file); + put_attr_search_ctx(ctx); + unmap_mft_record(dir_ni); + return mref; + } + /* + * For a case insensitive mount, we also perform a case + * insensitive comparison (provided the file name is not in the + * POSIX namespace). If the comparison matches, and the name is + * in the WIN32 namespace, we cache the filename in *res so + * that the caller, ntfs_lookup(), can work on it. If the + * comparison matches, and the name is in the DOS namespace, we + * only cache the mft reference and the file name type (we set + * the name length to zero for simplicity). + */ + if (!NVolCaseSensitive(vol) && + ie->key.file_name.file_name_type && + ntfs_are_names_equal(uname, uname_len, + (uchar_t*)&ie->key.file_name.file_name, + ie->key.file_name.file_name_length, + IGNORE_CASE, vol->upcase, vol->upcase_len)) { + int name_size = sizeof(ntfs_name); + u8 type = ie->key.file_name.file_name_type; + u8 len = ie->key.file_name.file_name_length; + + /* Only one case insensitive matching name allowed. */ + if (name) { + ntfs_error(sb, "Found already allocated name " + "in phase 1. Please run chkdsk " + "and if that doesn't find any " + "errors please report you saw " + "this message to " + "linux-ntfs-dev@lists.sf.net."); + goto dir_err_out; + } -/* - * Removed RACE for allocating index blocks. But stil not too happy. - * There might be more races afterwards. (AIA) - */ -static int ntfs_allocate_index_block(ntfs_iterate_s *walk) -{ - ntfs_attribute *allocation, *bitmap = 0; - int error, size, i, bit; - ntfs_u8 *bmap; - ntfs_io io; - ntfs_volume *vol = walk->dir->vol; - - /* Check for allocation attribute. */ - allocation = ntfs_find_attr(walk->dir, vol->at_index_allocation, I30); - if (!allocation) { - ntfs_u8 bmp[8]; - /* Create index allocation attribute. */ - error = ntfs_create_attr(walk->dir, vol->at_index_allocation, - I30, 0, 0, &allocation); - if (error) - goto err_ret; - ntfs_bzero(bmp, sizeof(bmp)); - error = ntfs_create_attr(walk->dir, vol->at_bitmap, I30, bmp, - sizeof(bmp), &bitmap); - if (error) - goto err_ret; - } else - bitmap = ntfs_find_attr(walk->dir, vol->at_bitmap, I30); - if (!bitmap) { - ntfs_error("Directory w/o bitmap\n"); - error = -EINVAL; - goto err_ret; - } - size = bitmap->size; - bmap = ntfs_malloc(size); - if (!bmap) { - error = -ENOMEM; - goto err_ret; - } - io.fn_put = ntfs_put; - io.fn_get = ntfs_get; -try_again: - io.param = bmap; - io.size = size; - error = ntfs_read_attr(walk->dir, vol->at_bitmap, I30, 0, &io); - if (error || (io.size != size && (error = -EIO, 1))) - goto err_fb_out; - /* Allocate a bit. */ - for (bit = i = 0; i < size; i++) { - if (bmap[i] == 0xFF) + if (type != FILE_NAME_DOS) + name_size += len * sizeof(uchar_t); + name = kmalloc(name_size, GFP_NOFS); + if (!name) { + err = -ENOMEM; + goto err_out; + } + name->mref = le64_to_cpu(ie->data.dir.indexed_file); + name->type = type; + if (type != FILE_NAME_DOS) { + name->len = len; + memcpy(name->name, ie->key.file_name.file_name, + len * sizeof(uchar_t)); + } else + name->len = 0; + *res = name; + } + /* + * Not a perfect match, need to do full blown collation so we + * know which way in the B+tree we have to go. + */ + rc = ntfs_collate_names(uname, uname_len, + (uchar_t*)&ie->key.file_name.file_name, + ie->key.file_name.file_name_length, 1, + IGNORE_CASE, vol->upcase, vol->upcase_len); + /* + * If uname collates before the name of the current entry, there + * is definitely no such name in this index but we might need to + * descend into the B+tree so we just break out of the loop. + */ + if (rc == -1) + break; + /* The names are not equal, continue the search. */ + if (rc) continue; - bit = ffz(bmap[i]); - if (bit < 8) + /* + * Names match with case insensitive comparison, now try the + * case sensitive comparison, which is required for proper + * collation. + */ + rc = ntfs_collate_names(uname, uname_len, + (uchar_t*)&ie->key.file_name.file_name, + ie->key.file_name.file_name_length, 1, + CASE_SENSITIVE, vol->upcase, vol->upcase_len); + if (rc == -1) break; + if (rc) + continue; + /* + * Perfect match, this will never happen as the + * ntfs_are_names_equal() call will have gotten a match but we + * still treat it correctly. + */ + goto found_it; } - if (i >= size) { - /* FIXME: Extend bitmap. */ - error = -EOPNOTSUPP; - goto err_fb_out; - } - /* Get the byte containing our bit again, now taking the BKL. */ - io.param = bmap; - io.size = 1; - lock_kernel(); - error = ntfs_read_attr(walk->dir, vol->at_bitmap, I30, i, &io); - if (error || (io.size != 1 && (error = -EIO, 1))) - goto err_unl_out; - if (ntfs_test_and_set_bit(bmap, bit)) { - unlock_kernel(); - /* Give other process(es) a chance to finish. */ - schedule(); - goto try_again; - } - walk->newblock = (i * 8 + bit) * walk->dir->u.index.clusters_per_record; - io.param = bmap; - error = ntfs_write_attr(walk->dir, vol->at_bitmap, I30, i, &io); - if (error || (io.size != size && (error = -EIO, 1))) - goto err_unl_out; - /* Change inode on disk, required when bitmap is resident. */ - error = ntfs_update_inode(walk->dir); - if (error) - goto err_unl_out; - unlock_kernel(); - ntfs_free(bmap); - /* Check whether record is out of allocated range. */ - size = allocation->size; - if (walk->newblock * vol->cluster_size >= size) { - /* Build index record. */ - int hsize; - int s1 = walk->dir->u.index.recordsize; - int nr_fix = (s1 >> vol->sector_size) + 1; - char *record = ntfs_malloc(s1); - if (!record) { - error = -ENOMEM; - goto err_ret; - } - ntfs_bzero(record, s1); - /* Magic */ - ntfs_memcpy(record, "INDX", 4); - /* Offset to fixups */ - NTFS_PUTU16(record + 4, 0x28); - /* Number of fixups. */ - NTFS_PUTU16(record + 6, nr_fix); - /* Log file sequence number - We don't do journalling so we - * just set it to zero which should be the Right Thing. (AIA) */ - NTFS_PUTU64(record + 8, 0); - /* VCN of buffer */ - NTFS_PUTU64(record + 0x10, walk->newblock); - /* Header size. */ - hsize = 0x10 + 2 * nr_fix; - hsize = (hsize + 7) & ~7; /* Align. */ - NTFS_PUTU16(record + 0x18, hsize); - /* Total size of record. */ - NTFS_PUTU32(record + 0x20, s1 - 0x18); - /* Writing the data will extend the attribute. */ - io.param = record; - io.size = s1; - io.do_read = 0; - error = ntfs_readwrite_attr(walk->dir, allocation, size, &io); - ntfs_free(record); - if (error || (io.size != s1 && (error = -EIO, 1))) - goto err_ret; - error = ntfs_update_inode(walk->dir); - if (error) - goto err_ret; - } - return 0; -err_unl_out: - unlock_kernel(); -err_fb_out: - ntfs_free(bmap); -err_ret: - return error; -} - -/* Write an index block (root or allocation) back to storage. - * Used is the total number of bytes in buf, including all headers. */ -static int ntfs_index_writeback(ntfs_iterate_s *walk, ntfs_u8 *buf, int block, - int used) -{ - ntfs_io io; - int error; - ntfs_attribute *a; - ntfs_volume *vol = walk->dir->vol; - - io.fn_put = 0; - io.fn_get = ntfs_get; - io.param = buf; - if (block == -1) { /* Index root. */ - NTFS_PUTU16(buf + 0x14, used - 0x10); - /* 0x18 is a copy thereof. */ - NTFS_PUTU16(buf + 0x18, used - 0x10); - io.size = used; - error = ntfs_write_attr(walk->dir, vol->at_index_root, I30, 0, - &io); - if (error || (io.size != used && (error = -EIO, 1))) - return error; - /* Shrink if necessary. */ - a = ntfs_find_attr(walk->dir, vol->at_index_root, I30); - ntfs_resize_attr(walk->dir, a, used); - } else { - NTFS_PUTU16(buf + 0x1C, used - 0x18); - io.size = walk->dir->u.index.recordsize; - error = ntfs_insert_fixups(buf, io.size); - if (error) { - printk(KERN_ALERT "NTFS: ntfs_index_writeback() caught " - "corrupt index record ntfs record " - "header. Refusing to write corrupt " - "data to disk. Unmount and run chkdsk " - "immediately!\n"); - return -EIO; + /* + * We have finished with this index without success. Check for the + * presence of a child node and if not present return -ENOENT, unless + * we have got a matching name cached in name in which case return the + * mft reference associated with it. + */ + if (!(ie->flags & INDEX_ENTRY_NODE)) { + if (name) { + put_attr_search_ctx(ctx); + unmap_mft_record(dir_ni); + return name->mref; } - error = ntfs_write_attr(walk->dir, vol->at_index_allocation, - I30, (__s64)block << vol->cluster_size_bits, - &io); - if (error || (io.size != walk->dir->u.index.recordsize && - (error = -EIO, 1))) - return error; + ntfs_debug("Entry not found."); + err = -ENOENT; + goto err_out; + } /* Child node present, descend into it. */ + /* Consistency check: Verify that an index allocation exists. */ + if (!NInoIndexAllocPresent(dir_ni)) { + ntfs_error(sb, "No index allocation attribute but index entry " + "requires one. Directory inode 0x%lx is " + "corrupt or driver bug.", dir_ni->mft_no); + err = -EIO; + goto err_out; } - return 0; -} - -static int ntfs_split_record(ntfs_iterate_s *walk, char *start, int bsize, - int usize) -{ - char *entry, *prev; - ntfs_u8 *newbuf = 0, *middle = 0; - int error, othersize, mlen; - ntfs_io io; - ntfs_volume *vol = walk->dir->vol; - int oldblock; - - error = ntfs_allocate_index_block(walk); - if (error) - return error; - /* This should not happen. */ - if (walk->block == -1) { - ntfs_error("Trying to split root"); - return -EOPNOTSUPP; - } - entry = start + NTFS_GETU16(start + 0x18) + 0x18; - for (prev = entry; entry - start < usize / 2; - entry += NTFS_GETU16(entry + 8)) - prev = entry; - newbuf = ntfs_malloc(vol->index_record_size); - if (!newbuf) - return -ENOMEM; - io.fn_put = ntfs_put; - io.fn_get = ntfs_get; - io.param = newbuf; - io.size = vol->index_record_size; - /* Read in old header. FIXME: Reading everything is overkill. */ - error = ntfs_read_attr(walk->dir, vol->at_index_allocation, I30, - (__s64)walk->newblock << vol->cluster_size_bits, &io); - if (error) - goto out; - if (io.size != vol->index_record_size) { - error = -EIO; - goto out; - } - /* FIXME: Adjust header. */ - /* Copy everything from entry to new block. */ - othersize = usize - (entry - start); - ntfs_memcpy(newbuf + NTFS_GETU16(newbuf + 0x18) + 0x18, entry, - othersize); - /* Copy flags. */ - NTFS_PUTU32(newbuf + 0x24, NTFS_GETU32(start + 0x24)); - error = ntfs_index_writeback(walk, newbuf, walk->newblock, - othersize + NTFS_GETU16(newbuf + 0x18) + 0x18); - if (error) - goto out; - /* Move prev to walk. */ - mlen = NTFS_GETU16(prev + 0x8); - /* Remember old child node. */ - if (ntfs_entry_has_subnodes(prev)) - oldblock = NTFS_GETU32(prev + mlen - 8); - else - oldblock = -1; - /* Allow for pointer to subnode. */ - middle = ntfs_malloc(ntfs_entry_has_subnodes(prev) ? mlen : mlen + 8); - if (!middle){ - error = -ENOMEM; - goto out; - } - ntfs_memcpy(middle, prev, mlen); - /* Set has_subnodes flag. */ - NTFS_PUTU8(middle + 0xC, NTFS_GETU8(middle + 0xC) | 1); - /* Middle entry points to block, parent entry will point to newblock. */ - NTFS_PUTU64(middle + mlen - 8, walk->block); - if (walk->new_entry) - ntfs_error("Entry not reset"); - walk->new_entry = middle; - walk->u.flags |= ITERATE_SPLIT_DONE; - /* Terminate old block. */ - othersize = usize - (prev-start); - NTFS_PUTU64(prev, 0); - if (oldblock == -1) { - NTFS_PUTU32(prev + 8, 0x10); - NTFS_PUTU32(prev + 0xC, 2); - othersize += 0x10; - } else { - NTFS_PUTU32(prev + 8, 0x18); - NTFS_PUTU32(prev + 0xC, 3); - NTFS_PUTU64(prev + 0x10, oldblock); - othersize += 0x18; - } - /* Write back original block. */ - error = ntfs_index_writeback(walk, start, walk->block, othersize); - out: - if (newbuf) - ntfs_free(newbuf); - if (middle) - ntfs_free(middle); - return error; -} - -static int ntfs_dir_insert(ntfs_iterate_s *walk, char *start, char* entry) -{ - int blocksize, usedsize, error, offset; - int do_split = 0; - offset = entry - start; - if (walk->block == -1) { /* index root */ - blocksize = walk->dir->vol->mft_record_size; - usedsize = NTFS_GETU16(start + 0x14) + 0x10; - } else { - blocksize = walk->dir->u.index.recordsize; - usedsize = NTFS_GETU16(start + 0x1C) + 0x18; - } - if (usedsize + walk->new_entry_size > blocksize) { - char* s1 = ntfs_malloc(blocksize + walk->new_entry_size); - if (!s1) - return -ENOMEM; - ntfs_memcpy(s1, start, usedsize); - do_split = 1; - /* Adjust entry to s1. */ - entry = s1 + (entry - start); - start = s1; - } - ntfs_memmove(entry + walk->new_entry_size, entry, usedsize - offset); - ntfs_memcpy(entry, walk->new_entry, walk->new_entry_size); - usedsize += walk->new_entry_size; - ntfs_free(walk->new_entry); - walk->new_entry = 0; - if (do_split) { - error = ntfs_split_record(walk, start, blocksize, usedsize); - ntfs_free(start); - } else { - error = ntfs_index_writeback(walk, start, walk->block,usedsize); - if (error) - return error; + /* Get the starting vcn of the index_block holding the child node. */ + vcn = sle64_to_cpup((u8*)ie + le16_to_cpu(ie->length) - 8); + ia_mapping = VFS_I(dir_ni)->i_mapping; + /* + * We are done with the index root and the mft record. Release them, + * otherwise we deadlock with ntfs_map_page(). + */ + put_attr_search_ctx(ctx); + unmap_mft_record(dir_ni); + m = NULL; + ctx = NULL; +descend_into_child_node: + /* + * Convert vcn to index into the index allocation attribute in units + * of PAGE_CACHE_SIZE and map the page cache page, reading it from + * disk if necessary. + */ + page = ntfs_map_page(ia_mapping, vcn << + dir_ni->itype.index.vcn_size_bits >> PAGE_CACHE_SHIFT); + if (IS_ERR(page)) { + ntfs_error(sb, "Failed to map directory index page, error %ld.", + -PTR_ERR(page)); + err = PTR_ERR(page); + goto err_out; + } + kaddr = (u8*)page_address(page); +fast_descend_into_child_node: + /* Get to the index allocation block. */ + ia = (INDEX_ALLOCATION*)(kaddr + ((vcn << + dir_ni->itype.index.vcn_size_bits) & ~PAGE_CACHE_MASK)); + /* Bounds checks. */ + if ((u8*)ia < kaddr || (u8*)ia > kaddr + PAGE_CACHE_SIZE) { + ntfs_error(sb, "Out of bounds check failed. Corrupt directory " + "inode 0x%lx or driver bug.", dir_ni->mft_no); + err = -EIO; + goto unm_err_out; } - return 0; -} - -/* Try to split INDEX_ROOT attributes. Return -E2BIG if nothing changed. */ -int ntfs_split_indexroot(ntfs_inode *ino) -{ - ntfs_attribute *ra; - ntfs_u8 *root = 0, *index = 0; - ntfs_io io; - int error, off, i, bsize, isize; - ntfs_iterate_s walk; - - ra = ntfs_find_attr(ino, ino->vol->at_index_root, I30); - if (!ra) - return -ENOTDIR; - bsize = ino->vol->mft_record_size; - root = ntfs_malloc(bsize); - if (!root) - return -E2BIG; - io.fn_put = ntfs_put; - io.param = root; - io.size = bsize; - error = ntfs_read_attr(ino, ino->vol->at_index_root, I30, 0, &io); - if (error) - goto out; - off = 0x20; - /* Count number of entries. */ - for (i = 0; ntfs_entry_is_used(root + off); i++) - off += NTFS_GETU16(root + off + 8); - if (i <= 2) { - /* We don't split small index roots. */ - error = -E2BIG; - goto out; - } - index = ntfs_malloc(ino->vol->index_record_size); - if (!index) { - error = -ENOMEM; - goto out; - } - walk.dir = ino; - walk.block = -1; - walk.result = walk.new_entry = 0; - walk.name = 0; - error = ntfs_allocate_index_block(&walk); - if (error) - goto out; - /* Write old root to new index block. */ - io.param = index; - io.size = ino->vol->index_record_size; - error = ntfs_read_attr(ino, ino->vol->at_index_allocation, I30, - (__s64)walk.newblock << ino->vol->cluster_size_bits, &io); - if (error) - goto out; - isize = NTFS_GETU16(root + 0x18) - 0x10; - ntfs_memcpy(index + NTFS_GETU16(index + 0x18) + 0x18, root+0x20, isize); - /* Copy flags. */ - NTFS_PUTU32(index + 0x24, NTFS_GETU32(root + 0x1C)); - error = ntfs_index_writeback(&walk, index, walk.newblock, - isize + NTFS_GETU16(index + 0x18) + 0x18); - if (error) - goto out; - /* Mark root as split. */ - NTFS_PUTU32(root + 0x1C, 1); - /* Truncate index root. */ - NTFS_PUTU64(root + 0x20, 0); - NTFS_PUTU32(root + 0x28, 0x18); - NTFS_PUTU32(root + 0x2C, 3); - NTFS_PUTU64(root + 0x30, walk.newblock); - error = ntfs_index_writeback(&walk, root, -1, 0x38); - out: - ntfs_free(root); - ntfs_free(index); - return error; -} - -/* The entry has been found. Copy the result in the caller's buffer */ -static int ntfs_copyresult(char *dest, char *source) -{ - int length = NTFS_GETU16(source + 8); - ntfs_memcpy(dest, source, length); - return 1; -} - -/* Use $UpCase some day. */ -static inline unsigned short ntfs_my_toupper(ntfs_volume *vol, ntfs_u16 x) -{ - /* We should read any pending rest of $UpCase here. */ - if (x >= vol->upcase_length) - return x; - return vol->upcase[x]; -} - -/* Everything passed in walk and entry. */ -static int ntfs_my_strcmp(ntfs_iterate_s *walk, const unsigned char *entry) -{ - int lu = *(entry + 0x50); - int i; - - ntfs_u16* name = (ntfs_u16*)(entry + 0x52); - ntfs_volume *vol = walk->dir->vol; - for (i = 0; i < lu && i < walk->namelen; i++) - if (ntfs_my_toupper(vol, NTFS_GETU16(name + i)) != - ntfs_my_toupper(vol, NTFS_GETU16(walk->name + i))) - break; - if (i == lu && i == walk->namelen) - return 0; - if (i == lu) - return 1; - if (i == walk->namelen) - return -1; - if (ntfs_my_toupper(vol, NTFS_GETU16(name + i)) < - ntfs_my_toupper(vol, NTFS_GETU16(walk->name + i))) - return 1; - return -1; -} - -/* Necessary forward declaration. */ -static int ntfs_getdir_iterate(ntfs_iterate_s *walk, char *start, char *entry); - -/* Parse a block of entries. Load the block, fix it up, and iterate over the - * entries. The block is given as virtual cluster number. */ -static int ntfs_getdir_record(ntfs_iterate_s *walk, int block) -{ - int length = walk->dir->u.index.recordsize; - char *record = (char*)ntfs_malloc(length); - char *offset; - int retval,error; - int oldblock; - ntfs_io io; - - if (!record) - return -ENOMEM; - io.fn_put = ntfs_put; - io.param = record; - io.size = length; - /* Read the block from the index allocation attribute. */ - error = ntfs_read_attr(walk->dir, walk->dir->vol->at_index_allocation, - I30, (__s64)block << walk->dir->vol->cluster_size_bits, &io); - if (error || io.size != length) { - ntfs_error("read failed\n"); - ntfs_free(record); - return 0; + if (sle64_to_cpu(ia->index_block_vcn) != vcn) { + ntfs_error(sb, "Actual VCN (0x%Lx) of index buffer is " + "different from expected VCN (0x%Lx). " + "Directory inode 0x%lx is corrupt or driver " + "bug.", + (long long)sle64_to_cpu(ia->index_block_vcn), + (long long)vcn, dir_ni->mft_no); + err = -EIO; + goto unm_err_out; } - if (!ntfs_check_index_record(walk->dir, record)) { - ntfs_error("%x is not an index record\n", block); - ntfs_free(record); - return 0; + if (le32_to_cpu(ia->index.allocated_size) + 0x18 != + dir_ni->itype.index.block_size) { + ntfs_error(sb, "Index buffer (VCN 0x%Lx) of directory inode " + "0x%lx has a size (%u) differing from the " + "directory specified size (%u). Directory " + "inode is corrupt or driver bug.", + (long long)vcn, dir_ni->mft_no, + le32_to_cpu(ia->index.allocated_size) + 0x18, + dir_ni->itype.index.block_size); + err = -EIO; + goto unm_err_out; } - offset = record + NTFS_GETU16(record + 0x18) + 0x18; - oldblock = walk->block; - walk->block = block; - retval = ntfs_getdir_iterate(walk, record, offset); - walk->block = oldblock; - ntfs_free(record); - return retval; -} - -/* Go down to the next block of entries. These collate before the current - * entry. */ -static int ntfs_descend(ntfs_iterate_s *walk, ntfs_u8 *start, ntfs_u8 *entry) -{ - int length = NTFS_GETU16(entry + 8); - int nextblock = NTFS_GETU32(entry + length - 8); - int error; - - if (!ntfs_entry_has_subnodes(entry)) { - ntfs_error("illegal ntfs_descend call\n"); - return 0; + index_end = (u8*)ia + dir_ni->itype.index.block_size; + if (index_end > kaddr + PAGE_CACHE_SIZE) { + ntfs_error(sb, "Index buffer (VCN 0x%Lx) of directory inode " + "0x%lx crosses page boundary. Impossible! " + "Cannot access! This is probably a bug in the " + "driver.", (long long)vcn, dir_ni->mft_no); + err = -EIO; + goto unm_err_out; } - error = ntfs_getdir_record(walk, nextblock); - if (!error && walk->type == DIR_INSERT && - (walk->u.flags & ITERATE_SPLIT_DONE)) { - /* Split has occurred. Adjust entry, insert new_entry. */ - NTFS_PUTU32(entry + length - 8, walk->newblock); - /* Reset flags, as the current block might be split again. */ - walk->u.flags &= ~ITERATE_SPLIT_DONE; - error = ntfs_dir_insert(walk, start, entry); + index_end = (u8*)&ia->index + le32_to_cpu(ia->index.index_length); + if (index_end > (u8*)ia + dir_ni->itype.index.block_size) { + ntfs_error(sb, "Size of index buffer (VCN 0x%Lx) of directory " + "inode 0x%lx exceeds maximum size.", + (long long)vcn, dir_ni->mft_no); + err = -EIO; + goto unm_err_out; } - return error; -} - -static int ntfs_getdir_iterate_byposition(ntfs_iterate_s *walk, char* start, - char *entry) -{ - int retval = 0; - int curpos = 0, destpos = 0; - int length; - if (walk->u.pos != 0) { - if (ntfs_is_top(walk->u.pos)) - return 0; - destpos = ntfs_pop(&walk->u.pos); - } - while (1) { - if (walk->u.pos == 0) { - if (ntfs_entry_has_subnodes(entry)) - ntfs_descend(walk, start, entry); - else - walk->u.pos = ntfs_top(); - if (ntfs_is_top(walk->u.pos) && - !ntfs_entry_is_used(entry)) - return 1; - walk->u.pos = ntfs_push(walk->u.pos, curpos); - return 1; + /* The first index entry. */ + ie = (INDEX_ENTRY*)((u8*)&ia->index + + le32_to_cpu(ia->index.entries_offset)); + /* + * Iterate similar to above big loop but applied to index buffer, thus + * loop until we exceed valid memory (corruption case) or until we + * reach the last entry. + */ + for (;; ie = (INDEX_ENTRY*)((u8*)ie + le16_to_cpu(ie->length))) { + /* Bounds check. */ + if ((u8*)ie < (u8*)ia || (u8*)ie + + sizeof(INDEX_ENTRY_HEADER) > index_end || + (u8*)ie + le16_to_cpu(ie->key_length) > + index_end) { + ntfs_error(sb, "Index entry out of bounds in " + "directory inode 0x%lx.", + dir_ni->mft_no); + err = -EIO; + goto unm_err_out; } - if (curpos == destpos) { - if (!ntfs_is_top(walk->u.pos) && - ntfs_entry_has_subnodes(entry)) { - retval = ntfs_descend(walk, start, entry); - if (retval) { - walk->u.pos = ntfs_push(walk->u.pos, - curpos); - return retval; + /* + * The last entry cannot contain a name. It can however contain + * a pointer to a child node in the B+tree so we just break out. + */ + if (ie->flags & INDEX_ENTRY_END) + break; + /* + * We perform a case sensitive comparison and if that matches + * we are done and return the mft reference of the inode (i.e. + * the inode number together with the sequence number for + * consistency checking). We convert it to cpu format before + * returning. + */ + if (ntfs_are_names_equal(uname, uname_len, + (uchar_t*)&ie->key.file_name.file_name, + ie->key.file_name.file_name_length, + CASE_SENSITIVE, vol->upcase, vol->upcase_len)) { +found_it2: + /* + * We have a perfect match, so we don't need to care + * about having matched imperfectly before, so we can + * free name and set *res to NULL. + * However, if the perfect match is a short file name, + * we need to signal this through *res, so that + * ntfs_lookup() can fix dcache aliasing issues. + * As an optimization we just reuse an existing + * allocation of *res. + */ + if (ie->key.file_name.file_name_type == FILE_NAME_DOS) { + if (!name) { + name = kmalloc(sizeof(ntfs_name), + GFP_NOFS); + if (!name) { + err = -ENOMEM; + goto unm_err_out; + } } - if (!ntfs_entry_is_used(entry)) - return 0; - walk->u.pos = 0; - } - if (ntfs_entry_is_used(entry)) { - retval = ntfs_copyresult(walk->result, entry); - walk->u.pos = 0; + name->mref = le64_to_cpu( + ie->data.dir.indexed_file); + name->type = FILE_NAME_DOS; + name->len = 0; + *res = name; } else { - walk->u.pos = ntfs_top(); - return 0; + if (name) + kfree(name); + *res = NULL; + } + mref = le64_to_cpu(ie->data.dir.indexed_file); + ntfs_unmap_page(page); + return mref; + } + /* + * For a case insensitive mount, we also perform a case + * insensitive comparison (provided the file name is not in the + * POSIX namespace). If the comparison matches, and the name is + * in the WIN32 namespace, we cache the filename in *res so + * that the caller, ntfs_lookup(), can work on it. If the + * comparison matches, and the name is in the DOS namespace, we + * only cache the mft reference and the file name type (we set + * the name length to zero for simplicity). + */ + if (!NVolCaseSensitive(vol) && + ie->key.file_name.file_name_type && + ntfs_are_names_equal(uname, uname_len, + (uchar_t*)&ie->key.file_name.file_name, + ie->key.file_name.file_name_length, + IGNORE_CASE, vol->upcase, vol->upcase_len)) { + int name_size = sizeof(ntfs_name); + u8 type = ie->key.file_name.file_name_type; + u8 len = ie->key.file_name.file_name_length; + + /* Only one case insensitive matching name allowed. */ + if (name) { + ntfs_error(sb, "Found already allocated name " + "in phase 2. Please run chkdsk " + "and if that doesn't find any " + "errors please report you saw " + "this message to " + "linux-ntfs-dev@lists.sf.net."); + ntfs_unmap_page(page); + goto dir_err_out; + } + + if (type != FILE_NAME_DOS) + name_size += len * sizeof(uchar_t); + name = kmalloc(name_size, GFP_NOFS); + if (!name) { + err = -ENOMEM; + goto unm_err_out; } + name->mref = le64_to_cpu(ie->data.dir.indexed_file); + name->type = type; + if (type != FILE_NAME_DOS) { + name->len = len; + memcpy(name->name, ie->key.file_name.file_name, + len * sizeof(uchar_t)); + } else + name->len = 0; + *res = name; } - curpos++; - if (!ntfs_entry_is_used(entry)) + /* + * Not a perfect match, need to do full blown collation so we + * know which way in the B+tree we have to go. + */ + rc = ntfs_collate_names(uname, uname_len, + (uchar_t*)&ie->key.file_name.file_name, + ie->key.file_name.file_name_length, 1, + IGNORE_CASE, vol->upcase, vol->upcase_len); + /* + * If uname collates before the name of the current entry, there + * is definitely no such name in this index but we might need to + * descend into the B+tree so we just break out of the loop. + */ + if (rc == -1) break; - length = NTFS_GETU16(entry + 8); - if (!length) { - ntfs_error("infinite loop\n"); + /* The names are not equal, continue the search. */ + if (rc) + continue; + /* + * Names match with case insensitive comparison, now try the + * case sensitive comparison, which is required for proper + * collation. + */ + rc = ntfs_collate_names(uname, uname_len, + (uchar_t*)&ie->key.file_name.file_name, + ie->key.file_name.file_name_length, 1, + CASE_SENSITIVE, vol->upcase, vol->upcase_len); + if (rc == -1) break; + if (rc) + continue; + /* + * Perfect match, this will never happen as the + * ntfs_are_names_equal() call will have gotten a match but we + * still treat it correctly. + */ + goto found_it2; + } + /* + * We have finished with this index buffer without success. Check for + * the presence of a child node. + */ + if (ie->flags & INDEX_ENTRY_NODE) { + if ((ia->index.flags & NODE_MASK) == LEAF_NODE) { + ntfs_error(sb, "Index entry with child node found in " + "a leaf node in directory inode 0x%lx.", + dir_ni->mft_no); + err = -EIO; + goto unm_err_out; } - entry += length; + /* Child node present, descend into it. */ + old_vcn = vcn; + vcn = sle64_to_cpup((u8*)ie + le16_to_cpu(ie->length) - 8); + if (vcn >= 0) { + /* If vcn is in the same page cache page as old_vcn we + * recycle the mapped page. */ + if (old_vcn << vol->cluster_size_bits >> + PAGE_CACHE_SHIFT == vcn << + vol->cluster_size_bits >> + PAGE_CACHE_SHIFT) + goto fast_descend_into_child_node; + ntfs_unmap_page(page); + goto descend_into_child_node; + } + ntfs_error(sb, "Negative child node vcn in directory inode " + "0x%lx.", dir_ni->mft_no); + err = -EIO; + goto unm_err_out; } - return -1; + /* + * No child node present, return -ENOENT, unless we have got a matching + * name cached in name in which case return the mft reference + * associated with it. + */ + if (name) { + ntfs_unmap_page(page); + return name->mref; + } + ntfs_debug("Entry not found."); + err = -ENOENT; +unm_err_out: + ntfs_unmap_page(page); +err_out: + if (ctx) + put_attr_search_ctx(ctx); + if (m) + unmap_mft_record(dir_ni); + if (name) { + kfree(name); + *res = NULL; + } + return ERR_MREF(err); +dir_err_out: + ntfs_error(sb, "Corrupt directory. Aborting lookup."); + err = -EIO; + goto err_out; } - -/* Iterate over a list of entries, either from an index block, or from the - * index root. - * If searching BY_POSITION, pop the top index from the position. If the - * position stack is empty then, return the item at the index and set the - * position to the next entry. If the position stack is not empty, - * recursively proceed for subnodes. If the entry at the position is the - * 'end of dir' entry, return 'not found' and the empty stack. - * If searching BY_NAME, walk through the items until found or until - * one item is collated after the requested item. In the former case, return - * the result. In the latter case, recursively proceed to the subnodes. - * If 'end of dir' is reached, the name is not in the directory */ -static int ntfs_getdir_iterate(ntfs_iterate_s *walk, char *start, char *entry) -{ - int length; - int cmp; - if (walk->type == BY_POSITION) - return ntfs_getdir_iterate_byposition(walk, start, entry); - do { - /* If the current entry is a real one, compare with the - * requested item. If the current entry is the last item, it - * is always larger than the requested item. */ - cmp = ntfs_entry_is_used(entry) ? - ntfs_my_strcmp(walk,entry) : -1; - switch (walk->type) { - case BY_NAME: - switch (cmp) { - case -1: - return ntfs_entry_has_subnodes(entry) ? - ntfs_descend(walk, start, entry) : 0; - case 0: - return ntfs_copyresult(walk->result, entry); - case 1: - break; - } +#if 0 + +// TODO: (AIA) +// The algorithm embedded in this code will be required for the time when we +// want to support adding of entries to directories, where we require correct +// collation of file names in order not to cause corruption of the file system. + +/** + * ntfs_lookup_inode_by_name - find an inode in a directory given its name + * @dir_ni: ntfs inode of the directory in which to search for the name + * @uname: Unicode name for which to search in the directory + * @uname_len: length of the name @uname in Unicode characters + * + * Look for an inode with name @uname in the directory with inode @dir_ni. + * ntfs_lookup_inode_by_name() walks the contents of the directory looking for + * the Unicode name. If the name is found in the directory, the corresponding + * inode number (>= 0) is returned as a mft reference in cpu format, i.e. it + * is a 64-bit number containing the sequence number. + * + * On error, a negative value is returned corresponding to the error code. In + * particular if the inode is not found -ENOENT is returned. Note that you + * can't just check the return value for being negative, you have to check the + * inode number for being negative which you can extract using MREC(return + * value). + * + * Note, @uname_len does not include the (optional) terminating NULL character. + */ +u64 ntfs_lookup_inode_by_name(ntfs_inode *dir_ni, const uchar_t *uname, + const int uname_len) +{ + ntfs_volume *vol = dir_ni->vol; + struct super_block *sb = vol->sb; + MFT_RECORD *m; + INDEX_ROOT *ir; + INDEX_ENTRY *ie; + INDEX_ALLOCATION *ia; + u8 *index_end; + u64 mref; + attr_search_context *ctx; + int err, rc; + IGNORE_CASE_BOOL ic; + VCN vcn, old_vcn; + struct address_space *ia_mapping; + struct page *page; + u8 *kaddr; + + /* Get hold of the mft record for the directory. */ + m = map_mft_record(dir_ni); + if (IS_ERR(m)) { + ntfs_error(sb, "map_mft_record() failed with error code %ld.", + -PTR_ERR(m)); + return ERR_MREF(PTR_ERR(m)); + } + ctx = get_attr_search_ctx(dir_ni, m); + if (!ctx) { + err = -ENOMEM; + goto err_out; + } + /* Find the index root attribute in the mft record. */ + if (!lookup_attr(AT_INDEX_ROOT, I30, 4, CASE_SENSITIVE, 0, NULL, 0, + ctx)) { + ntfs_error(sb, "Index root attribute missing in directory " + "inode 0x%lx.", dir_ni->mft_no); + err = -EIO; + goto err_out; + } + /* Get to the index root value (it's been verified in read_inode). */ + ir = (INDEX_ROOT*)((u8*)ctx->attr + + le16_to_cpu(ctx->attr->data.resident.value_offset)); + index_end = (u8*)&ir->index + le32_to_cpu(ir->index.index_length); + /* The first index entry. */ + ie = (INDEX_ENTRY*)((u8*)&ir->index + + le32_to_cpu(ir->index.entries_offset)); + /* + * Loop until we exceed valid memory (corruption case) or until we + * reach the last entry. + */ + for (;; ie = (INDEX_ENTRY*)((u8*)ie + le16_to_cpu(ie->length))) { + /* Bounds checks. */ + if ((u8*)ie < (u8*)ctx->mrec || (u8*)ie + + sizeof(INDEX_ENTRY_HEADER) > index_end || + (u8*)ie + le16_to_cpu(ie->key_length) > + index_end) + goto dir_err_out; + /* + * The last entry cannot contain a name. It can however contain + * a pointer to a child node in the B+tree so we just break out. + */ + if (ie->flags & INDEX_ENTRY_END) break; - case DIR_INSERT: - switch (cmp) { - case -1: - return ntfs_entry_has_subnodes(entry) ? - ntfs_descend(walk, start, entry) : - ntfs_dir_insert(walk, start, entry); - case 0: - return -EEXIST; - case 1: - break; - } + /* + * If the current entry has a name type of POSIX, the name is + * case sensitive and not otherwise. This has the effect of us + * not being able to access any POSIX file names which collate + * after the non-POSIX one when they only differ in case, but + * anyone doing screwy stuff like that deserves to burn in + * hell... Doing that kind of stuff on NT4 actually causes + * corruption on the partition even when using SP6a and Linux + * is not involved at all. + */ + ic = ie->key.file_name.file_name_type ? IGNORE_CASE : + CASE_SENSITIVE; + /* + * If the names match perfectly, we are done and return the + * mft reference of the inode (i.e. the inode number together + * with the sequence number for consistency checking. We + * convert it to cpu format before returning. + */ + if (ntfs_are_names_equal(uname, uname_len, + (uchar_t*)&ie->key.file_name.file_name, + ie->key.file_name.file_name_length, ic, + vol->upcase, vol->upcase_len)) { +found_it: + mref = le64_to_cpu(ie->data.dir.indexed_file); + put_attr_search_ctx(ctx); + unmap_mft_record(dir_ni); + return mref; + } + /* + * Not a perfect match, need to do full blown collation so we + * know which way in the B+tree we have to go. + */ + rc = ntfs_collate_names(uname, uname_len, + (uchar_t*)&ie->key.file_name.file_name, + ie->key.file_name.file_name_length, 1, + IGNORE_CASE, vol->upcase, vol->upcase_len); + /* + * If uname collates before the name of the current entry, there + * is definitely no such name in this index but we might need to + * descend into the B+tree so we just break out of the loop. + */ + if (rc == -1) + break; + /* The names are not equal, continue the search. */ + if (rc) + continue; + /* + * Names match with case insensitive comparison, now try the + * case sensitive comparison, which is required for proper + * collation. + */ + rc = ntfs_collate_names(uname, uname_len, + (uchar_t*)&ie->key.file_name.file_name, + ie->key.file_name.file_name_length, 1, + CASE_SENSITIVE, vol->upcase, vol->upcase_len); + if (rc == -1) break; - default: - ntfs_error("TODO\n"); /* FIXME: ? */ + if (rc) + continue; + /* + * Perfect match, this will never happen as the + * ntfs_are_names_equal() call will have gotten a match but we + * still treat it correctly. + */ + goto found_it; + } + /* + * We have finished with this index without success. Check for the + * presence of a child node. + */ + if (!(ie->flags & INDEX_ENTRY_NODE)) { + /* No child node, return -ENOENT. */ + err = -ENOENT; + goto err_out; + } /* Child node present, descend into it. */ + /* Consistency check: Verify that an index allocation exists. */ + if (!NInoIndexAllocPresent(dir_ni)) { + ntfs_error(sb, "No index allocation attribute but index entry " + "requires one. Directory inode 0x%lx is " + "corrupt or driver bug.", dir_ni->mft_no); + err = -EIO; + goto err_out; + } + /* Get the starting vcn of the index_block holding the child node. */ + vcn = sle64_to_cpup((u8*)ie + le16_to_cpu(ie->length) - 8); + ia_mapping = VFS_I(dir_ni)->i_mapping; + /* + * We are done with the index root and the mft record. Release them, + * otherwise we deadlock with ntfs_map_page(). + */ + put_attr_search_ctx(ctx); + unmap_mft_record(dir_ni); + m = NULL; + ctx = NULL; +descend_into_child_node: + /* + * Convert vcn to index into the index allocation attribute in units + * of PAGE_CACHE_SIZE and map the page cache page, reading it from + * disk if necessary. + */ + page = ntfs_map_page(ia_mapping, vcn << + dir_ni->itype.index.vcn_size_bits >> PAGE_CACHE_SHIFT); + if (IS_ERR(page)) { + ntfs_error(sb, "Failed to map directory index page, error %ld.", + -PTR_ERR(page)); + err = PTR_ERR(page); + goto err_out; + } + kaddr = (u8*)page_address(page); +fast_descend_into_child_node: + /* Get to the index allocation block. */ + ia = (INDEX_ALLOCATION*)(kaddr + ((vcn << + dir_ni->itype.index.vcn_size_bits) & ~PAGE_CACHE_MASK)); + /* Bounds checks. */ + if ((u8*)ia < kaddr || (u8*)ia > kaddr + PAGE_CACHE_SIZE) { + ntfs_error(sb, "Out of bounds check failed. Corrupt directory " + "inode 0x%lx or driver bug.", dir_ni->mft_no); + err = -EIO; + goto unm_err_out; + } + if (sle64_to_cpu(ia->index_block_vcn) != vcn) { + ntfs_error(sb, "Actual VCN (0x%Lx) of index buffer is " + "different from expected VCN (0x%Lx). " + "Directory inode 0x%lx is corrupt or driver " + "bug.", + (long long)sle64_to_cpu(ia->index_block_vcn), + (long long)vcn, dir_ni->mft_no); + err = -EIO; + goto unm_err_out; + } + if (le32_to_cpu(ia->index.allocated_size) + 0x18 != + dir_ni->itype.index.block_size) { + ntfs_error(sb, "Index buffer (VCN 0x%Lx) of directory inode " + "0x%lx has a size (%u) differing from the " + "directory specified size (%u). Directory " + "inode is corrupt or driver bug.", + (long long)vcn, dir_ni->mft_no, + le32_to_cpu(ia->index.allocated_size) + 0x18, + dir_ni->itype.index.block_size); + err = -EIO; + goto unm_err_out; + } + index_end = (u8*)ia + dir_ni->itype.index.block_size; + if (index_end > kaddr + PAGE_CACHE_SIZE) { + ntfs_error(sb, "Index buffer (VCN 0x%Lx) of directory inode " + "0x%lx crosses page boundary. Impossible! " + "Cannot access! This is probably a bug in the " + "driver.", (long long)vcn, dir_ni->mft_no); + err = -EIO; + goto unm_err_out; + } + index_end = (u8*)&ia->index + le32_to_cpu(ia->index.index_length); + if (index_end > (u8*)ia + dir_ni->itype.index.block_size) { + ntfs_error(sb, "Size of index buffer (VCN 0x%Lx) of directory " + "inode 0x%lx exceeds maximum size.", + (long long)vcn, dir_ni->mft_no); + err = -EIO; + goto unm_err_out; + } + /* The first index entry. */ + ie = (INDEX_ENTRY*)((u8*)&ia->index + + le32_to_cpu(ia->index.entries_offset)); + /* + * Iterate similar to above big loop but applied to index buffer, thus + * loop until we exceed valid memory (corruption case) or until we + * reach the last entry. + */ + for (;; ie = (INDEX_ENTRY*)((u8*)ie + le16_to_cpu(ie->length))) { + /* Bounds check. */ + if ((u8*)ie < (u8*)ia || (u8*)ie + + sizeof(INDEX_ENTRY_HEADER) > index_end || + (u8*)ie + le16_to_cpu(ie->key_length) > + index_end) { + ntfs_error(sb, "Index entry out of bounds in " + "directory inode 0x%lx.", + dir_ni->mft_no); + err = -EIO; + goto unm_err_out; } - if (!ntfs_entry_is_used(entry)) + /* + * The last entry cannot contain a name. It can however contain + * a pointer to a child node in the B+tree so we just break out. + */ + if (ie->flags & INDEX_ENTRY_END) break; - length = NTFS_GETU16(entry + 8); - if (!length) { - ntfs_error("infinite loop\n"); + /* + * If the current entry has a name type of POSIX, the name is + * case sensitive and not otherwise. This has the effect of us + * not being able to access any POSIX file names which collate + * after the non-POSIX one when they only differ in case, but + * anyone doing screwy stuff like that deserves to burn in + * hell... Doing that kind of stuff on NT4 actually causes + * corruption on the partition even when using SP6a and Linux + * is not involved at all. + */ + ic = ie->key.file_name.file_name_type ? IGNORE_CASE : + CASE_SENSITIVE; + /* + * If the names match perfectly, we are done and return the + * mft reference of the inode (i.e. the inode number together + * with the sequence number for consistency checking. We + * convert it to cpu format before returning. + */ + if (ntfs_are_names_equal(uname, uname_len, + (uchar_t*)&ie->key.file_name.file_name, + ie->key.file_name.file_name_length, ic, + vol->upcase, vol->upcase_len)) { +found_it2: + mref = le64_to_cpu(ie->data.dir.indexed_file); + ntfs_unmap_page(page); + return mref; + } + /* + * Not a perfect match, need to do full blown collation so we + * know which way in the B+tree we have to go. + */ + rc = ntfs_collate_names(uname, uname_len, + (uchar_t*)&ie->key.file_name.file_name, + ie->key.file_name.file_name_length, 1, + IGNORE_CASE, vol->upcase, vol->upcase_len); + /* + * If uname collates before the name of the current entry, there + * is definitely no such name in this index but we might need to + * descend into the B+tree so we just break out of the loop. + */ + if (rc == -1) + break; + /* The names are not equal, continue the search. */ + if (rc) + continue; + /* + * Names match with case insensitive comparison, now try the + * case sensitive comparison, which is required for proper + * collation. + */ + rc = ntfs_collate_names(uname, uname_len, + (uchar_t*)&ie->key.file_name.file_name, + ie->key.file_name.file_name_length, 1, + CASE_SENSITIVE, vol->upcase, vol->upcase_len); + if (rc == -1) break; + if (rc) + continue; + /* + * Perfect match, this will never happen as the + * ntfs_are_names_equal() call will have gotten a match but we + * still treat it correctly. + */ + goto found_it2; + } + /* + * We have finished with this index buffer without success. Check for + * the presence of a child node. + */ + if (ie->flags & INDEX_ENTRY_NODE) { + if ((ia->index.flags & NODE_MASK) == LEAF_NODE) { + ntfs_error(sb, "Index entry with child node found in " + "a leaf node in directory inode 0x%lx.", + dir_ni->mft_no); + err = -EIO; + goto unm_err_out; } - entry += length; - } while (1); - return 0; -} - -/* Tree walking is done using position numbers. The following numbers have a - * special meaning: - * 0 start (.) - * -1 no more entries - * -2 .. - * All other numbers encode sequences of indices. The sequence a, b, c is - * encoded as , where is the encoding of foo. The - * first few integers are encoded as follows: - * 0: 0000 1: 0010 2: 0100 3: 0110 - * 4: 1000 5: 1010 6: 1100 stop: 1110 - * 7: 000001 8: 000101 9: 001001 10: 001101 - * The least significant bits give the width of this encoding, the other bits - * encode the value, starting from the first value of the interval. - * tag width first value last value - * 0 3 0 6 - * 01 4 7 22 - * 011 5 23 54 - * 0111 6 55 119 - * More values are hopefully not needed, as the file position has currently - * 64 bits in total. */ - -/* Find an entry in the directory. Return 0 if not found, otherwise copy the - * entry to the result buffer. */ -int ntfs_getdir(ntfs_iterate_s *walk) -{ - int length = walk->dir->vol->mft_record_size; - int retval, error; - /* Start at the index root. */ - char *root = ntfs_malloc(length); - ntfs_io io; - - if (!root) - return -ENOMEM; - io.fn_put = ntfs_put; - io.param = root; - io.size = length; - error = ntfs_read_attr(walk->dir, walk->dir->vol->at_index_root, I30, - 0, &io); - if (error) { - ntfs_error("Not a directory\n"); - return 0; + /* Child node present, descend into it. */ + old_vcn = vcn; + vcn = sle64_to_cpup((u8*)ie + le16_to_cpu(ie->length) - 8); + if (vcn >= 0) { + /* If vcn is in the same page cache page as old_vcn we + * recycle the mapped page. */ + if (old_vcn << vol->cluster_size_bits >> + PAGE_CACHE_SHIFT == vcn << + vol->cluster_size_bits >> + PAGE_CACHE_SHIFT) + goto fast_descend_into_child_node; + ntfs_unmap_page(page); + goto descend_into_child_node; + } + ntfs_error(sb, "Negative child node vcn in directory inode " + "0x%lx.", dir_ni->mft_no); + err = -EIO; + goto unm_err_out; } - walk->block = -1; - /* FIXME: Move these to walk. */ - walk->dir->u.index.recordsize = NTFS_GETU32(root + 0x8); - walk->dir->u.index.clusters_per_record = NTFS_GETU32(root + 0xC); - /* FIXME: Consistency check. */ - /* Skip header. */ - retval = ntfs_getdir_iterate(walk, root, root + 0x20); - ntfs_free(root); - return retval; + /* No child node, return -ENOENT. */ + ntfs_debug("Entry not found."); + err = -ENOENT; +unm_err_out: + ntfs_unmap_page(page); +err_out: + if (ctx) + put_attr_search_ctx(ctx); + if (m) + unmap_mft_record(dir_ni); + return ERR_MREF(err); +dir_err_out: + ntfs_error(sb, "Corrupt directory. Aborting lookup."); + err = -EIO; + goto err_out; } -/* Find an entry in the directory by its position stack. Iteration starts - * if the stack is 0, in which case the position is set to the first item - * in the directory. If the position is nonzero, return the item at the - * position and change the position to the next item. The position is -1 - * if there are no more items. */ -int ntfs_getdir_byposition(ntfs_iterate_s *walk) -{ - walk->type = BY_POSITION; - return ntfs_getdir(walk); -} +#endif -/* Find an entry in the directory by its name. Return 0 if not found. */ -int ntfs_getdir_byname(ntfs_iterate_s *walk) -{ - walk->type = BY_NAME; - return ntfs_getdir(walk); +typedef union { + INDEX_ROOT *ir; + INDEX_ALLOCATION *ia; +} index_union __attribute__ ((__transparent_union__)); + +typedef enum { + INDEX_TYPE_ROOT, /* index root */ + INDEX_TYPE_ALLOCATION, /* index allocation */ +} INDEX_TYPE; + +/** + * ntfs_filldir - ntfs specific filldir method + * @vol: current ntfs volume + * @fpos: position in the directory + * @ndir: ntfs inode of current directory + * @index_type: specifies whether @iu is an index root or an index allocation + * @iu: index root or index allocation attribute to which @ie belongs + * @ie: current index entry + * @name: buffer to use for the converted name + * @dirent: vfs filldir callback context + * @filldir: vfs filldir callback + * + * Convert the Unicode @name to the loaded NLS and pass it to the @filldir + * callback. + */ +static inline int ntfs_filldir(ntfs_volume *vol, loff_t *fpos, + ntfs_inode *ndir, const INDEX_TYPE index_type, + index_union iu, INDEX_ENTRY *ie, u8 *name, + void *dirent, filldir_t filldir) +{ + int name_len; + unsigned dt_type; + FILE_NAME_TYPE_FLAGS name_type; + + /* Advance the position even if going to skip the entry. */ + if (index_type == INDEX_TYPE_ALLOCATION) + *fpos = (u8*)ie - (u8*)iu.ia + + (sle64_to_cpu(iu.ia->index_block_vcn) << + ndir->itype.index.vcn_size_bits) + + vol->mft_record_size; + else /* if (index_type == INDEX_TYPE_ROOT) */ + *fpos = (u8*)ie - (u8*)iu.ir; + name_type = ie->key.file_name.file_name_type; + if (name_type == FILE_NAME_DOS) { + ntfs_debug("Skipping DOS name space entry."); + return 0; + } + if (MREF_LE(ie->data.dir.indexed_file) == FILE_root) { + ntfs_debug("Skipping root directory self reference entry."); + return 0; + } + if (MREF_LE(ie->data.dir.indexed_file) < FILE_first_user && + !NVolShowSystemFiles(vol)) { + ntfs_debug("Skipping system file."); + return 0; + } + name_len = ntfs_ucstonls(vol, (uchar_t*)&ie->key.file_name.file_name, + ie->key.file_name.file_name_length, &name, + NTFS_MAX_NAME_LEN * NLS_MAX_CHARSET_SIZE + 1); + if (name_len <= 0) { + ntfs_debug("Skipping unrepresentable file."); + return 0; + } + if (ie->key.file_name.file_attributes & + FILE_ATTR_DUP_FILE_NAME_INDEX_PRESENT) + dt_type = DT_DIR; + else + dt_type = DT_REG; + ntfs_debug("Calling filldir for %s with len %i, fpos 0x%Lx, inode " + "0x%lx, DT_%s.", name, name_len, *fpos, + MREF_LE(ie->data.dir.indexed_file), + dt_type == DT_DIR ? "DIR" : "REG"); + return filldir(dirent, name, name_len, *fpos, + MREF_LE(ie->data.dir.indexed_file), dt_type); } -int ntfs_getdir_unsorted(ntfs_inode *ino, u32 *p_high, u32 *p_low, - int (*cb)(ntfs_u8 *, void *), void *param) +/* + * VFS calls readdir with BKL (and i_sem) held so no possible RACE conditions. + * We use the same basic approach as the old NTFS driver, i.e. we parse the + * index root entries and then the index allocation entries that are marked + * as in use in the index bitmap. + * While this will return the names in random order this doesn't matter for + * readdir but OTOH results in a faster readdir. + */ +static int ntfs_readdir(struct file *filp, void *dirent, filldir_t filldir) { - s64 ib_ofs; - char *buf = 0, *entry = 0; - ntfs_attribute *attr; - ntfs_volume *vol; - int byte, bit, err = 0; - u32 start, finish, ibs, max_size; - ntfs_io io; - u8 ibs_bits; - - if (!ino) { - ntfs_error("%s(): No inode! Returning -EINVAL.\n",__FUNCTION__); - return -EINVAL; - } - vol = ino->vol; - if (!vol) { - ntfs_error("%s(): Inode 0x%lx has no volume. Returning " - "-EINVAL.\n", __FUNCTION__, ino->i_number); - return -EINVAL; - } - ntfs_debug(DEBUG_DIR3, "%s(): Unsorted 1: Entering for inode 0x%lx, " - "p_high = 0x%x, p_low = 0x%x.\n", __FUNCTION__, - ino->i_number, *p_high, *p_low); - if (!*p_high) { - /* We are still in the index root. */ - buf = ntfs_malloc(io.size = vol->mft_record_size); - if (!buf) - return -ENOMEM; - io.fn_put = ntfs_put; - io.param = buf; - err = ntfs_read_attr(ino, vol->at_index_root, I30, 0, &io); - if (err || !io.size) - goto read_err_ret; - ino->u.index.recordsize = ibs = NTFS_GETU32(buf + 0x8); - ino->u.index.clusters_per_record = NTFS_GETU32(buf + 0xC); - entry = buf + 0x20; - ntfs_debug(DEBUG_DIR3, "%s(): Unsorted 2: In index root.\n", - __FUNCTION__); - ibs_bits = ffs(ibs) - 1; - /* Compensate for faked "." and "..". */ - start = 2; - } else { /* We are in an index record. */ - io.size = ibs = ino->u.index.recordsize; - buf = ntfs_malloc(ibs); - if (!buf) - return -ENOMEM; - ibs_bits = ffs(ibs) - 1; - io.fn_put = ntfs_put; - io.param = buf; - /* - * 0 is index root, index allocation starts at 1 and works in - * units of index block size (ibs). - */ - ib_ofs = (s64)(*p_high - 1) << ibs_bits; - err = ntfs_read_attr(ino, vol->at_index_allocation, I30, ib_ofs, - &io); - if (err || io.size != ibs) - goto read_err_ret; - if (!ntfs_check_index_record(ino, buf)) { - ntfs_error("%s(): Index block 0x%x is not an index " - "record. Returning -ENOTDIR.\n", - __FUNCTION__, *p_high - 1); - ntfs_free(buf); - return -ENOTDIR; - } - entry = buf + 0x18 + NTFS_GETU16(buf + 0x18); - ntfs_debug(DEBUG_DIR3, "%s(): Unsorted 3: In index " - "allocation.\n", __FUNCTION__); - start = 0; - } - /* Process the entries. */ - finish = *p_low; - for (; entry < (buf + ibs) && ntfs_entry_is_used(entry); - entry += NTFS_GETU16(entry + 8)) { - if (start < finish) { - /* Skip entries that were already processed. */ - ntfs_debug(DEBUG_DIR3, "%s(): Unsorted 4: Skipping " - "already processed entry p_high 0x%x, " - "p_low 0x%x.\n", __FUNCTION__, *p_high, - start); - start++; + s64 ia_pos, ia_start, prev_ia_pos, bmp_pos; + loff_t fpos; + struct inode *bmp_vi, *vdir = filp->f_dentry->d_inode; + struct super_block *sb = vdir->i_sb; + ntfs_inode *ndir = NTFS_I(vdir); + ntfs_volume *vol = NTFS_SB(sb); + MFT_RECORD *m; + INDEX_ROOT *ir; + INDEX_ENTRY *ie; + INDEX_ALLOCATION *ia; + u8 *name = NULL; + int rc, err, ir_pos, cur_bmp_pos; + struct address_space *ia_mapping, *bmp_mapping; + struct page *bmp_page = NULL, *ia_page = NULL; + u8 *kaddr, *bmp, *index_end; + attr_search_context *ctx; + + fpos = filp->f_pos; + ntfs_debug("Entering for inode 0x%lx, fpos 0x%Lx.", + vdir->i_ino, fpos); + rc = err = 0; + /* Are we at end of dir yet? */ + if (fpos >= vdir->i_size + vol->mft_record_size) + goto done; + /* Emulate . and .. for all directories. */ + if (!fpos) { + ntfs_debug("Calling filldir for . with len 1, fpos 0x0, " + "inode 0x%lx, DT_DIR.", vdir->i_ino); + rc = filldir(dirent, ".", 1, fpos, vdir->i_ino, DT_DIR); + if (rc) + goto done; + fpos++; + } + if (fpos == 1) { + ntfs_debug("Calling filldir for .. with len 2, fpos 0x1, " + "inode 0x%lx, DT_DIR.", + filp->f_dentry->d_parent->d_inode->i_ino); + rc = filldir(dirent, "..", 2, fpos, + filp->f_dentry->d_parent->d_inode->i_ino, + DT_DIR); + if (rc) + goto done; + fpos++; + } + m = NULL; + ctx = NULL; + /* + * Allocate a buffer to store the current name being processed + * converted to format determined by current NLS. + */ + name = (u8*)kmalloc(NTFS_MAX_NAME_LEN * NLS_MAX_CHARSET_SIZE + 1, + GFP_NOFS); + if (unlikely(!name)) { + err = -ENOMEM; + goto err_out; + } + /* Are we jumping straight into the index allocation attribute? */ + if (fpos >= vol->mft_record_size) + goto skip_index_root; + /* Get hold of the mft record for the directory. */ + m = map_mft_record(ndir); + if (unlikely(IS_ERR(m))) { + err = PTR_ERR(m); + m = NULL; + goto err_out; + } + ctx = get_attr_search_ctx(ndir, m); + if (unlikely(!ctx)) { + err = -ENOMEM; + goto err_out; + } + /* Get the offset into the index root attribute. */ + ir_pos = (s64)fpos; + /* Find the index root attribute in the mft record. */ + if (unlikely(!lookup_attr(AT_INDEX_ROOT, I30, 4, CASE_SENSITIVE, 0, + NULL, 0, ctx))) { + ntfs_error(sb, "Index root attribute missing in directory " + "inode 0x%lx.", vdir->i_ino); + goto err_out; + } + /* Get to the index root value (it's been verified in read_inode). */ + ir = (INDEX_ROOT*)((u8*)ctx->attr + + le16_to_cpu(ctx->attr->data.resident.value_offset)); + index_end = (u8*)&ir->index + le32_to_cpu(ir->index.index_length); + /* The first index entry. */ + ie = (INDEX_ENTRY*)((u8*)&ir->index + + le32_to_cpu(ir->index.entries_offset)); + /* + * Loop until we exceed valid memory (corruption case) or until we + * reach the last entry or until filldir tells us it has had enough + * or signals an error (both covered by the rc test). + */ + for (;; ie = (INDEX_ENTRY*)((u8*)ie + le16_to_cpu(ie->length))) { + ntfs_debug("In index root, offset 0x%x.", (u8*)ie - (u8*)ir); + /* Bounds checks. */ + if (unlikely((u8*)ie < (u8*)ctx->mrec || (u8*)ie + + sizeof(INDEX_ENTRY_HEADER) > index_end || + (u8*)ie + le16_to_cpu(ie->key_length) > + index_end)) + goto err_out; + /* The last entry cannot contain a name. */ + if (ie->flags & INDEX_ENTRY_END) + break; + /* Skip index root entry if continuing previous readdir. */ + if (ir_pos > (u8*)ie - (u8*)ir) continue; + /* Submit the name to the filldir callback. */ + rc = ntfs_filldir(vol, &fpos, ndir, INDEX_TYPE_ROOT, ir, ie, + name, dirent, filldir); + if (rc) { + put_attr_search_ctx(ctx); + unmap_mft_record(ndir); + goto abort; } - ntfs_debug(DEBUG_DIR3, "%s(): Unsorted 5: Processing entry " - "p_high 0x%x, p_low 0x%x.\n", __FUNCTION__, - *p_high, *p_low); - if ((err = cb(entry, param))) { - /* filldir signalled us to stop. */ - ntfs_debug(DEBUG_DIR3, "%s(): Unsorted 6: cb returned " - "%i, returning 0, p_high 0x%x, " - "p_low 0x%x.\n", __FUNCTION__, err, - *p_high, *p_low); - ntfs_free(buf); - return 0; - } - ++*p_low; } - ntfs_debug(DEBUG_DIR3, "%s(): Unsorted 7: After processing entries, " - "p_high 0x%x, p_low 0x%x.\n", __FUNCTION__, *p_high, - *p_low); - /* We have to locate the next record. */ - ntfs_free(buf); - buf = 0; - *p_low = 0; - attr = ntfs_find_attr(ino, vol->at_bitmap, I30); - if (!attr) { - /* Directory does not have index bitmap and index allocation. */ - *p_high = 0x7fff; - ntfs_debug(DEBUG_DIR3, "%s(): Unsorted 8: No index allocation. " - "Returning 0, p_high 0x7fff, p_low 0x0.\n", - __FUNCTION__); - return 0; + /* + * We are done with the index root and the mft record for that matter. + * We need to release it, otherwise we deadlock on ntfs_attr_iget() + * and/or ntfs_read_page(). + */ + put_attr_search_ctx(ctx); + unmap_mft_record(ndir); + m = NULL; + ctx = NULL; + /* If there is no index allocation attribute we are finished. */ + if (!NInoIndexAllocPresent(ndir)) + goto EOD; + /* Advance fpos to the beginning of the index allocation. */ + fpos = vol->mft_record_size; +skip_index_root: + kaddr = NULL; + prev_ia_pos = -1LL; + /* Get the offset into the index allocation attribute. */ + ia_pos = (s64)fpos - vol->mft_record_size; + ia_mapping = vdir->i_mapping; + bmp_vi = ndir->itype.index.bmp_ino; + if (unlikely(!bmp_vi)) { + ntfs_debug("Inode %lu, regetting index bitmap.", vdir->i_ino); + bmp_vi = ntfs_attr_iget(vdir, AT_BITMAP, I30, 4); + if (unlikely(IS_ERR(bmp_vi))) { + ntfs_error(sb, "Failed to get bitmap attribute."); + err = PTR_ERR(bmp_vi); + goto err_out; + } + ndir->itype.index.bmp_ino = bmp_vi; } - max_size = attr->size; - if (max_size > 0x7fff >> 3) { - ntfs_error("%s(): Directory too large. Visible " - "length is truncated.\n", __FUNCTION__); - max_size = 0x7fff >> 3; - } - buf = ntfs_malloc(max_size); - if (!buf) - return -ENOMEM; - io.param = buf; - io.size = max_size; - err = ntfs_read_attr(ino, vol->at_bitmap, I30, 0, &io); - if (err || io.size != max_size) - goto read_err_ret; - attr = ntfs_find_attr(ino, vol->at_index_allocation, I30); - if (!attr) { - ntfs_free(buf); - ntfs_debug(DEBUG_DIR3, "%s(): Unsorted 9: Find attr failed. " - "Returning -EIO.\n", __FUNCTION__); - return -EIO; - } - if (attr->resident) { - ntfs_free(buf); - ntfs_debug(DEBUG_DIR3, "%s(): Unsorted 9.5: IA is resident. Not" - " allowed. Returning EINVAL.\n", __FUNCTION__); - return -EINVAL; - } - /* Loop while going through non-allocated index records. */ - max_size <<= 3; - while (1) { - if (++*p_high >= 0x7fff) { - ntfs_error("%s(): Unsorted 10: Directory " - "inode 0x%lx overflowed the maximum " - "number of index allocation buffers " - "the driver can cope with. Pretending " - "to be at end of directory.\n", - __FUNCTION__, ino->i_number); - goto fake_eod; + bmp_mapping = bmp_vi->i_mapping; + /* Get the starting bitmap bit position and sanity check it. */ + bmp_pos = ia_pos >> ndir->itype.index.block_size_bits; + if (unlikely(bmp_pos >> 3 >= bmp_vi->i_size)) { + ntfs_error(sb, "Current index allocation position exceeds " + "index bitmap size."); + goto err_out; + } + /* Get the starting bit position in the current bitmap page. */ + cur_bmp_pos = bmp_pos & ((PAGE_CACHE_SIZE * 8) - 1); + bmp_pos &= ~(u64)((PAGE_CACHE_SIZE * 8) - 1); +get_next_bmp_page: + ntfs_debug("Reading bitmap with page index 0x%Lx, bit ofs 0x%Lx", + (long long)bmp_pos >> (3 + PAGE_CACHE_SHIFT), + (long long)bmp_pos & ((PAGE_CACHE_SIZE * 8) - 1)); + bmp_page = ntfs_map_page(bmp_mapping, + bmp_pos >> (3 + PAGE_CACHE_SHIFT)); + if (unlikely(IS_ERR(bmp_page))) { + ntfs_error(sb, "Reading index bitmap failed."); + err = PTR_ERR(bmp_page); + bmp_page = NULL; + goto err_out; + } + bmp = (u8*)page_address(bmp_page); + /* Find next index block in use. */ + while (!(bmp[cur_bmp_pos >> 3] & (1 << (cur_bmp_pos & 7)))) { +find_next_index_buffer: + cur_bmp_pos++; + /* + * If we have reached the end of the bitmap page, get the next + * page, and put away the old one. + */ + if (unlikely((cur_bmp_pos >> 3) >= PAGE_CACHE_SIZE)) { + ntfs_unmap_page(bmp_page); + bmp_pos += PAGE_CACHE_SIZE * 8; + cur_bmp_pos = 0; + goto get_next_bmp_page; } - if (*p_high > max_size || (s64)*p_high << ibs_bits > - attr->initialized) { -fake_eod: - /* No more index records. */ - *p_high = 0x7fff; - *p_low = 0; - ntfs_free(buf); - ntfs_debug(DEBUG_DIR3, "%s(): Unsorted 10.5: No more " - "index records. Returning 0, p_high " - "0x7fff, p_low 0.\n", __FUNCTION__); - return 0; + /* If we have reached the end of the bitmap, we are done. */ + if (unlikely(((bmp_pos + cur_bmp_pos) >> 3) >= vdir->i_size)) + goto unm_EOD; + ia_pos = (bmp_pos + cur_bmp_pos) << + ndir->itype.index.block_size_bits; + } + ntfs_debug("Handling index buffer 0x%Lx.", + (long long)bmp_pos + cur_bmp_pos); + /* If the current index buffer is in the same page we reuse the page. */ + if ((prev_ia_pos & PAGE_CACHE_MASK) != (ia_pos & PAGE_CACHE_MASK)) { + prev_ia_pos = ia_pos; + if (likely(ia_page != NULL)) + ntfs_unmap_page(ia_page); + /* + * Map the page cache page containing the current ia_pos, + * reading it from disk if necessary. + */ + ia_page = ntfs_map_page(ia_mapping, ia_pos >> PAGE_CACHE_SHIFT); + if (unlikely(IS_ERR(ia_page))) { + ntfs_error(sb, "Reading index allocation data failed."); + err = PTR_ERR(ia_page); + ia_page = NULL; + goto err_out; } - byte = (ntfs_cluster_t)(*p_high - 1); - bit = 1 << (byte & 7); - byte >>= 3; - if ((buf[byte] & bit)) + kaddr = (u8*)page_address(ia_page); + } + /* Get the current index buffer. */ + ia = (INDEX_ALLOCATION*)(kaddr + (ia_pos & ~PAGE_CACHE_MASK & + ~(s64)(ndir->itype.index.block_size - 1))); + /* Bounds checks. */ + if (unlikely((u8*)ia < kaddr || (u8*)ia > kaddr + PAGE_CACHE_SIZE)) { + ntfs_error(sb, "Out of bounds check failed. Corrupt directory " + "inode 0x%lx or driver bug.", vdir->i_ino); + goto err_out; + } + if (unlikely(sle64_to_cpu(ia->index_block_vcn) != (ia_pos & + ~(s64)(ndir->itype.index.block_size - 1)) >> + ndir->itype.index.vcn_size_bits)) { + ntfs_error(sb, "Actual VCN (0x%Lx) of index buffer is " + "different from expected VCN (0x%Lx). " + "Directory inode 0x%lx is corrupt or driver " + "bug. ", + (long long)sle64_to_cpu(ia->index_block_vcn), + (long long)ia_pos >> + ndir->itype.index.vcn_size_bits, vdir->i_ino); + goto err_out; + } + if (unlikely(le32_to_cpu(ia->index.allocated_size) + 0x18 != + ndir->itype.index.block_size)) { + ntfs_error(sb, "Index buffer (VCN 0x%Lx) of directory inode " + "0x%lx has a size (%u) differing from the " + "directory specified size (%u). Directory " + "inode is corrupt or driver bug.", + (long long)ia_pos >> + ndir->itype.index.vcn_size_bits, vdir->i_ino, + le32_to_cpu(ia->index.allocated_size) + 0x18, + ndir->itype.index.block_size); + goto err_out; + } + index_end = (u8*)ia + ndir->itype.index.block_size; + if (unlikely(index_end > kaddr + PAGE_CACHE_SIZE)) { + ntfs_error(sb, "Index buffer (VCN 0x%Lx) of directory inode " + "0x%lx crosses page boundary. Impossible! " + "Cannot access! This is probably a bug in the " + "driver.", (long long)ia_pos >> + ndir->itype.index.vcn_size_bits, vdir->i_ino); + goto err_out; + } + ia_start = ia_pos & ~(s64)(ndir->itype.index.block_size - 1); + index_end = (u8*)&ia->index + le32_to_cpu(ia->index.index_length); + if (unlikely(index_end > (u8*)ia + ndir->itype.index.block_size)) { + ntfs_error(sb, "Size of index buffer (VCN 0x%Lx) of directory " + "inode 0x%lx exceeds maximum size.", + (long long)ia_pos >> + ndir->itype.index.vcn_size_bits, vdir->i_ino); + goto err_out; + } + /* The first index entry in this index buffer. */ + ie = (INDEX_ENTRY*)((u8*)&ia->index + + le32_to_cpu(ia->index.entries_offset)); + /* + * Loop until we exceed valid memory (corruption case) or until we + * reach the last entry or until filldir tells us it has had enough + * or signals an error (both covered by the rc test). + */ + for (;; ie = (INDEX_ENTRY*)((u8*)ie + le16_to_cpu(ie->length))) { + ntfs_debug("In index allocation, offset 0x%Lx.", + (long long)ia_start + ((u8*)ie - (u8*)ia)); + /* Bounds checks. */ + if (unlikely((u8*)ie < (u8*)ia || (u8*)ie + + sizeof(INDEX_ENTRY_HEADER) > index_end || + (u8*)ie + le16_to_cpu(ie->key_length) > + index_end)) + goto err_out; + /* The last entry cannot contain a name. */ + if (ie->flags & INDEX_ENTRY_END) break; - }; - ntfs_debug(DEBUG_DIR3, "%s(): Unsorted 11: Done. Returning 0, p_high " - "0x%x, p_low 0x%x.\n", __FUNCTION__, *p_high, *p_low); - ntfs_free(buf); + /* Skip index block entry if continuing previous readdir. */ + if (ia_pos - ia_start > (u8*)ie - (u8*)ia) + continue; + /* Submit the name to the filldir callback. */ + rc = ntfs_filldir(vol, &fpos, ndir, INDEX_TYPE_ALLOCATION, ia, + ie, name, dirent, filldir); + if (rc) { + ntfs_unmap_page(ia_page); + ntfs_unmap_page(bmp_page); + goto abort; + } + } + goto find_next_index_buffer; +unm_EOD: + if (ia_page) + ntfs_unmap_page(ia_page); + ntfs_unmap_page(bmp_page); +EOD: + /* We are finished, set fpos to EOD. */ + fpos = vdir->i_size + vol->mft_record_size; +abort: + kfree(name); +done: +#ifdef DEBUG + if (!rc) + ntfs_debug("EOD, fpos 0x%Lx, returning 0.", fpos); + else + ntfs_debug("filldir returned %i, fpos 0x%Lx, returning 0.", + rc, fpos); +#endif + filp->f_pos = fpos; return 0; -read_err_ret: +err_out: + if (bmp_page) + ntfs_unmap_page(bmp_page); + if (ia_page) + ntfs_unmap_page(ia_page); + if (name) + kfree(name); + if (ctx) + put_attr_search_ctx(ctx); + if (m) + unmap_mft_record(ndir); if (!err) err = -EIO; - ntfs_error("%s(): Read failed. Returning error code %i.\n", - __FUNCTION__, err); - ntfs_free(buf); + ntfs_debug("Failed. Returning error code %i.", -err); + filp->f_pos = fpos; return err; } -int ntfs_dir_add(ntfs_inode *dir, ntfs_inode *new, ntfs_attribute *name) -{ - ntfs_iterate_s walk; - int nsize, esize; - ntfs_u8* entry, *ndata; - int error; - - walk.type = DIR_INSERT; - walk.dir = dir; - walk.u.flags = 0; - nsize = name->size; - ndata = name->d.data; - walk.name = (ntfs_u16*)(ndata + 0x42); - walk.namelen = NTFS_GETU8(ndata + 0x40); - walk.new_entry_size = esize = (nsize + 0x10 + 7) & ~7; - walk.new_entry = entry = ntfs_malloc(esize); - if (!entry) - return -ENOMEM; - NTFS_PUTINUM(entry, new); - NTFS_PUTU16(entry + 0x8, esize); /* Size of entry. */ - NTFS_PUTU16(entry + 0xA, nsize); /* Size of original name attribute. */ - NTFS_PUTU16(entry + 0xC, 0); /* Flags. */ - NTFS_PUTU16(entry + 0xE, 0); /* Reserved. */ - ntfs_memcpy(entry + 0x10, ndata, nsize); - ntfs_bzero(entry + 0x10 + nsize, esize - 0x10 - nsize); - error = ntfs_getdir(&walk); - if (walk.new_entry) - ntfs_free(walk.new_entry); - return error; -} - -#if 0 -int ntfs_dir_add1(ntfs_inode *dir, const char* name, int namelen, - ntfs_inode *ino) -{ - ntfs_iterate_s walk; - int error; - int nsize; - char *entry; - ntfs_attribute *name_attr; - error = ntfs_decodeuni(dir->vol, name, namelen, &walk.name, - &walk.namelen); - if (error) - return error; - /* FIXME: Set flags. */ - walk.type = DIR_INSERT; - walk.dir = dir; - /* walk.new = ino; */ - /* Prepare new entry. */ - /* Round up to a multiple of 8. */ - walk.new_entry_size = nsize = ((0x52 + 2 * walk.namelen + 7) / 8) * 8; - walk.new_entry = entry = ntfs_malloc(nsize); - if (!entry) - return -ENOMEM; - ntfs_bzero(entry, nsize); - NTFS_PUTINUM(entry, ino); - NTFS_PUTU16(entry + 8, nsize); - NTFS_PUTU16(entry + 0xA, 0x42 + 2 * namelen); /* FIXME: Size of name - * attribute. */ - NTFS_PUTU32(entry + 0xC, 0); /* FIXME: D-F? */ - name_attr = ntfs_find_attr(ino, vol->at_file_name, 0); - /* FIXME: multiple names */ - if (!name_attr || !name_attr->resident) - return -EIDRM; - /* Directory, file stamps, sizes, filename. */ - ntfs_memcpy(entry + 0x10, name_attr->d.data, 0x42 + 2 * namelen); - error = ntfs_getdir(&walk); - ntfs_free(walk.name); - return error; -} -#endif - -/* Fills out and creates an INDEX_ROOT attribute. */ -int ntfs_add_index_root(ntfs_inode *ino, int type) +/** + * ntfs_dir_open - called when an inode is about to be opened + * @vi: inode to be opened + * @filp: file structure describing the inode + * + * Limit directory size to the page cache limit on architectures where unsigned + * long is 32-bits. This is the most we can do for now without overflowing the + * page cache page index. Doing it this way means we don't run into problems + * because of existing too large directories. It would be better to allow the + * user to read the accessible part of the directory but I doubt very much + * anyone is going to hit this check on a 32-bit architecture, so there is no + * point in adding the extra complexity required to support this. + * + * On 64-bit architectures, the check is hopefully optimized away by the + * compiler. + */ +static int ntfs_dir_open(struct inode *vi, struct file *filp) { - ntfs_attribute *da; - ntfs_u8 data[0x30]; /* 0x20 header, 0x10 last entry. */ - char name[10]; - - NTFS_PUTU32(data, type); - /* Collation rule. 1 == COLLATION_FILENAME */ - NTFS_PUTU32(data + 4, 1); - NTFS_PUTU32(data + 8, ino->vol->index_record_size); - NTFS_PUTU32(data + 0xC, ino->vol->index_clusters_per_record); - /* Byte offset to first INDEX_ENTRY. */ - NTFS_PUTU32(data + 0x10, 0x10); - /* Size of entries, including header. */ - NTFS_PUTU32(data + 0x14, 0x20); - NTFS_PUTU32(data + 0x18, 0x20); - /* No index allocation, yet. */ - NTFS_PUTU32(data + 0x1C, 0); - /* Add last entry. */ - /* Indexed MFT record. */ - NTFS_PUTU64(data + 0x20, 0); - /* Size of entry. */ - NTFS_PUTU32(data + 0x28, 0x10); - /* Flags: Last entry, no child nodes. */ - NTFS_PUTU32(data + 0x2C, 2); - /* Compute name. */ - ntfs_indexname(name, type); - return ntfs_create_attr(ino, ino->vol->at_index_root, name, - data, sizeof(data), &da); + if (sizeof(unsigned long) < 8) { + if (vi->i_size > MAX_LFS_FILESIZE) + return -EFBIG; + } + return 0; } -int ntfs_mkdir(ntfs_inode *dir, const char *name, int namelen, - ntfs_inode *result) -{ - int error; - - error = ntfs_alloc_inode(dir, result, name, namelen, NTFS_AFLAG_DIR); - if (error) - goto out; - error = ntfs_add_index_root(result, 0x30); - if (error) - goto out; - /* Set directory bit. */ - result->attr[0x16] |= 2; - error = ntfs_update_inode(dir); - if (error) - goto out; - error = ntfs_update_inode(result); - if (error) - goto out; - out: - return error; -} +struct file_operations ntfs_dir_ops = { + .llseek = generic_file_llseek, /* Seek inside directory. */ + .read = generic_read_dir, /* Return -EISDIR. */ + .readdir = ntfs_readdir, /* Read directory contents. */ + .open = ntfs_dir_open, /* Open directory. */ +}; diff -urN linux-2.4.24-vanilla/fs/ntfs/dir.h linux-2.4.24-ntfs-2.1.6a/fs/ntfs/dir.h --- linux-2.4.24-vanilla/fs/ntfs/dir.h 2001-07-16 23:14:10.000000000 +0100 +++ linux-2.4.24-ntfs-2.1.6a/fs/ntfs/dir.h 2004-01-21 14:31:44.000000000 +0000 @@ -1,48 +1,47 @@ /* - * dir.h - Header file for dir.c + * dir.h - Defines for directory handling in NTFS Linux kernel driver. Part of + * the Linux-NTFS project. * - * Copyright (C) 1997 Régis Duchesne + * Copyright (c) 2002 Anton Altaparmakov. + * + * This program/include file is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as published + * by the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program/include file is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program (in the main directory of the Linux-NTFS + * distribution in the file COPYING); if not, write to the Free Software + * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -#define ITERATE_SPLIT_DONE 1 -enum ntfs_iterate_e { - BY_POSITION, - BY_NAME, - DIR_INSERT -}; - -/* not all fields are used for all operations */ -typedef struct ntfs_iterate_s { - enum ntfs_iterate_e type; - ntfs_inode *dir; - union{ - ntfs_u64 pos; - int flags; - }u; - char *result; /* pointer to entry if found */ - ntfs_u16* name; - int namelen; - int block; /* current index record */ - int newblock; /* index record created in a split */ - char *new_entry; - int new_entry_size; - /*ntfs_inode* new;*/ -} ntfs_iterate_s; - -int ntfs_getdir_unsorted(ntfs_inode *ino, ntfs_u32 *p_high, ntfs_u32* p_low, - int (*cb)(ntfs_u8*, void*), void *param); - -int ntfs_getdir_byname(ntfs_iterate_s *walk); - -int ntfs_dir_add(ntfs_inode *dir, ntfs_inode *new, ntfs_attribute *name); - -int ntfs_check_index_record(ntfs_inode *ino, char *record); +#ifndef _LINUX_NTFS_DIR_H +#define _LINUX_NTFS_DIR_H -int ntfs_getdir_byposition(ntfs_iterate_s *walk); +#include "layout.h" + +/* + * ntfs_name is used to return the file name to the caller of + * ntfs_lookup_inode_by_name() in order for the caller (namei.c::ntfs_lookup()) + * to be able to deal with dcache aliasing issues. + */ +typedef struct { + MFT_REF mref; + FILE_NAME_TYPE_FLAGS type; + u8 len; + uchar_t name[0]; +} __attribute__ ((__packed__)) ntfs_name; -int ntfs_mkdir(ntfs_inode* dir,const char* name,int namelen, ntfs_inode *ino); +/* The little endian Unicode string $I30 as a global constant. */ +extern uchar_t I30[5]; -int ntfs_split_indexroot(ntfs_inode *ino); +extern MFT_REF ntfs_lookup_inode_by_name(ntfs_inode *dir_ni, + const uchar_t *uname, const int uname_len, ntfs_name **res); -int ntfs_add_index_root(ntfs_inode *ino, int type); +#endif /* _LINUX_NTFS_FS_DIR_H */ diff -urN linux-2.4.24-vanilla/fs/ntfs/endian.h linux-2.4.24-ntfs-2.1.6a/fs/ntfs/endian.h --- linux-2.4.24-vanilla/fs/ntfs/endian.h 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.4.24-ntfs-2.1.6a/fs/ntfs/endian.h 2004-01-21 14:31:43.000000000 +0000 @@ -0,0 +1,48 @@ +/* + * endian.h - Defines for endianness handling in NTFS Linux kernel driver. + * Part of the Linux-NTFS project. + * + * Copyright (c) 2001 Anton Altaparmakov. + * + * This program/include file is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as published + * by the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program/include file is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program (in the main directory of the Linux-NTFS + * distribution in the file COPYING); if not, write to the Free Software + * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _LINUX_NTFS_ENDIAN_H +#define _LINUX_NTFS_ENDIAN_H + +#include + +/* + * Signed endianness conversion defines. + */ +#define sle16_to_cpu(x) ((s16)__le16_to_cpu((s16)(x))) +#define sle32_to_cpu(x) ((s32)__le32_to_cpu((s32)(x))) +#define sle64_to_cpu(x) ((s64)__le64_to_cpu((s64)(x))) + +#define sle16_to_cpup(x) ((s16)__le16_to_cpu(*(s16*)(x))) +#define sle32_to_cpup(x) ((s32)__le32_to_cpu(*(s32*)(x))) +#define sle64_to_cpup(x) ((s64)__le64_to_cpu(*(s64*)(x))) + +#define cpu_to_sle16(x) ((s16)__cpu_to_le16((s16)(x))) +#define cpu_to_sle32(x) ((s32)__cpu_to_le32((s32)(x))) +#define cpu_to_sle64(x) ((s64)__cpu_to_le64((s64)(x))) + +#define cpu_to_sle16p(x) ((s16)__cpu_to_le16(*(s16*)(x))) +#define cpu_to_sle32p(x) ((s32)__cpu_to_le32(*(s32*)(x))) +#define cpu_to_sle64p(x) ((s64)__cpu_to_le64(*(s64*)(x))) + +#endif /* _LINUX_NTFS_ENDIAN_H */ + diff -urN linux-2.4.24-vanilla/fs/ntfs/file.c linux-2.4.24-ntfs-2.1.6a/fs/ntfs/file.c --- linux-2.4.24-vanilla/fs/ntfs/file.c 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.4.24-ntfs-2.1.6a/fs/ntfs/file.c 2004-01-21 14:28:25.000000000 +0000 @@ -0,0 +1,71 @@ +/* + * file.c - NTFS kernel file operations. Part of the Linux-NTFS project. + * + * Copyright (c) 2001 Anton Altaparmakov. + * + * This program/include file is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as published + * by the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program/include file is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program (in the main directory of the Linux-NTFS + * distribution in the file COPYING); if not, write to the Free Software + * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "ntfs.h" + +/** + * ntfs_file_open - called when an inode is about to be opened + * @vi: inode to be opened + * @filp: file structure describing the inode + * + * Limit file size to the page cache limit on architectures where unsigned long + * is 32-bits. This is the most we can do for now without overflowing the page + * cache page index. Doing it this way means we don't run into problems because + * of existing too large files. It would be better to allow the user to read + * the beginning of the file but I doubt very much anyone is going to hit this + * check on a 32-bit architecture, so there is no point in adding the extra + * complexity required to support this. + * + * On 64-bit architectures, the check is hopefully optimized away by the + * compiler. + * + * After the check passes, just call generic_file_open() to do its work. + */ +static int ntfs_file_open(struct inode *vi, struct file *filp) +{ + if (sizeof(unsigned long) < 8) { + if (vi->i_size > MAX_LFS_FILESIZE) + return -EFBIG; + } + return generic_file_open(vi, filp); +} + +struct file_operations ntfs_file_ops = { + .llseek = generic_file_llseek, /* Seek inside file. */ + .read = generic_file_read, /* Read from file. */ +#ifdef NTFS_RW + .write = generic_file_write, /* Write to a file. */ +#endif + .mmap = generic_file_mmap, /* Mmap file. */ + .open = ntfs_file_open, /* Open file. */ +}; + +struct inode_operations ntfs_file_inode_ops = { +#ifdef NTFS_RW + .truncate = ntfs_truncate, + .setattr = ntfs_setattr, +#endif +}; + +struct file_operations ntfs_empty_file_ops = {}; + +struct inode_operations ntfs_empty_inode_ops = {}; + diff -urN linux-2.4.24-vanilla/fs/ntfs/fs.c linux-2.4.24-ntfs-2.1.6a/fs/ntfs/fs.c --- linux-2.4.24-vanilla/fs/ntfs/fs.c 2003-11-28 18:26:21.000000000 +0000 +++ linux-2.4.24-ntfs-2.1.6a/fs/ntfs/fs.c 1970-01-01 01:00:00.000000000 +0100 @@ -1,1166 +0,0 @@ -/* - * fs.c - NTFS driver for Linux 2.4.x - * - * Legato Systems, Inc. (http://www.legato.com) have sponsored Anton - * Altaparmakov to develop NTFS on Linux since June 2001. - * - * Copyright (C) 1995-1997, 1999 Martin von Löwis - * Copyright (C) 1996 Richard Russon - * Copyright (C) 1996-1997 Régis Duchesne - * Copyright (C) 2000-2001, Anton Altaparmakov (AIA) - */ - -#include -#include -#include "ntfstypes.h" -#include "struct.h" -#include "util.h" -#include "inode.h" -#include "super.h" -#include "dir.h" -#include "support.h" -#include "macros.h" -#include "sysctl.h" -#include "attr.h" -#include -#include -#include -#include -#include -#include -#include -#include -#include - -/* Forward declarations. */ -static struct inode_operations ntfs_dir_inode_operations; -static struct file_operations ntfs_dir_operations; - -#define ITEM_SIZE 2040 - -/* Io functions to user space. */ -static void ntfs_putuser(ntfs_io* dest, void *src, ntfs_size_t len) -{ - copy_to_user(dest->param, src, len); - dest->param += len; -} - -#ifdef CONFIG_NTFS_RW -struct ntfs_getuser_update_vm_s { - const char *user; - struct inode *ino; - loff_t off; -}; - -static void ntfs_getuser_update_vm(void *dest, ntfs_io *src, ntfs_size_t len) -{ - struct ntfs_getuser_update_vm_s *p = src->param; - - copy_from_user(dest, p->user, len); - p->user += len; - p->off += len; -} -#endif - -/* loff_t is 64 bit signed, so is cool. */ -static ssize_t ntfs_read(struct file *filp, char *buf, size_t count,loff_t *off) -{ - int error; - ntfs_io io; - ntfs_attribute *attr; - ntfs_inode *ino = NTFS_LINO2NINO(filp->f_dentry->d_inode); - - /* Inode is not properly initialized. */ - if (!ino) - return -EINVAL; - ntfs_debug(DEBUG_OTHER, "ntfs_read %x, %Lx, %x ->", - (unsigned)ino->i_number, (unsigned long long)*off, - (unsigned)count); - attr = ntfs_find_attr(ino, ino->vol->at_data, NULL); - /* Inode has no unnamed data attribute. */ - if (!attr) { - ntfs_debug(DEBUG_OTHER, "ntfs_read: $DATA not found!\n"); - return -EINVAL; - } - if (attr->flags & ATTR_IS_ENCRYPTED) - return -EACCES; - /* Read the data. */ - io.fn_put = ntfs_putuser; - io.fn_get = 0; - io.param = buf; - io.size = count; - error = ntfs_read_attr(ino, ino->vol->at_data, NULL, *off, &io); - if (error && !io.size) { - ntfs_debug(DEBUG_OTHER, "ntfs_read: read_attr failed with " - "error %i, io size %u.\n", error, io.size); - return error; - } - *off += io.size; - ntfs_debug(DEBUG_OTHER, "ntfs_read: finished. read %u bytes.\n", - io.size); - return io.size; -} - -#ifdef CONFIG_NTFS_RW -static ssize_t ntfs_write(struct file *filp, const char *buf, size_t count, - loff_t *pos) -{ - int err; - struct inode *vfs_ino = filp->f_dentry->d_inode; - ntfs_inode *ntfs_ino = NTFS_LINO2NINO(vfs_ino); - ntfs_attribute *data; - ntfs_io io; - struct ntfs_getuser_update_vm_s param; - - if (!ntfs_ino) - return -EINVAL; - ntfs_debug(DEBUG_LINUX, "%s(): Entering for inode 0x%lx, *pos 0x%Lx, " - "count 0x%x.\n", __FUNCTION__, ntfs_ino->i_number, - *pos, count); - /* Allows to lock fs ro at any time. */ - if (vfs_ino->i_sb->s_flags & MS_RDONLY) - return -EROFS; - data = ntfs_find_attr(ntfs_ino, ntfs_ino->vol->at_data, NULL); - if (!data) - return -EINVAL; - /* Evaluating O_APPEND is the file system's job... */ - if (filp->f_flags & O_APPEND) - *pos = vfs_ino->i_size; - if (!data->resident && *pos + count > data->allocated) { - err = ntfs_extend_attr(ntfs_ino, data, *pos + count); - if (err < 0) - return err; - } - param.user = buf; - param.ino = vfs_ino; - param.off = *pos; - io.fn_put = 0; - io.fn_get = ntfs_getuser_update_vm; - io.param = ¶m; - io.size = count; - io.do_read = 0; - err = ntfs_readwrite_attr(ntfs_ino, data, *pos, &io); - ntfs_debug(DEBUG_LINUX, "%s(): Returning %i\n", __FUNCTION__, -err); - if (!err) { - *pos += io.size; - if (*pos > vfs_ino->i_size) - vfs_ino->i_size = *pos; - mark_inode_dirty(vfs_ino); - return io.size; - } - return err; -} -#endif - -struct ntfs_filldir { - struct inode *dir; - filldir_t filldir; - unsigned int type; - u32 ph, pl; - void *dirent; - char *name; - int namelen; - int ret_code; -}; - -static int ntfs_printcb(ntfs_u8 *entry, void *param) -{ - unsigned long inum = NTFS_GETU64(entry) & 0xffffffffffff; - struct ntfs_filldir *nf = param; - u32 flags = NTFS_GETU32(entry + 0x48); - char show_sys_files = 0; - u8 name_len = NTFS_GETU8(entry + 0x50); - u8 name_type = NTFS_GETU8(entry + 0x51); - int err; - unsigned file_type; - - switch (nf->type) { - case ngt_dos: - /* Don't display long names. */ - if (!(name_type & 2)) - return 0; - break; - case ngt_nt: - /* Don't display short-only names. */ - if ((name_type & 3) == 2) - return 0; - break; - case ngt_posix: - break; - case ngt_full: - show_sys_files = 1; - break; - default: - BUG(); - } - err = ntfs_encodeuni(NTFS_INO2VOL(nf->dir), (ntfs_u16*)(entry + 0x52), - name_len, &nf->name, &nf->namelen); - if (err) { - ntfs_debug(DEBUG_OTHER, "%s(): Skipping unrepresentable " - "file.\n", __FUNCTION__); - err = 0; - goto err_ret; - } - if (!show_sys_files && inum < 0x10UL) { - ntfs_debug(DEBUG_OTHER, "%s(): Skipping system file (%s).\n", - __FUNCTION__, nf->name); - err = 0; - goto err_ret; - } - /* Do not return ".", as this is faked. */ - if (nf->namelen == 1 && nf->name[0] == '.') { - ntfs_debug(DEBUG_OTHER, "%s(): Skipping \".\"\n", __FUNCTION__); - err = 0; - goto err_ret; - } - nf->name[nf->namelen] = 0; - if (flags & 0x10000000) /* FILE_ATTR_DUP_FILE_NAME_INDEX_PRESENT */ - file_type = DT_DIR; - else - file_type = DT_REG; - ntfs_debug(DEBUG_OTHER, "%s(): Calling filldir for %s with " - "len %i, f_pos 0x%Lx, inode %lu, %s.\n", __FUNCTION__, - nf->name, nf->namelen, (loff_t)(nf->ph << 16) | nf->pl, - inum, file_type == DT_DIR ? "DT_DIR" : "DT_REG"); - /* - * Userspace side of filldir expects an off_t rather than an loff_t. - * And it also doesn't like the most significant bit being set as it - * then considers the value to be negative. Thus this implementation - * limits the number of index records to 32766, which should be plenty. - */ - err = nf->filldir(nf->dirent, nf->name, nf->namelen, - (loff_t)(nf->ph << 16) | nf->pl, inum, file_type); - if (err) - nf->ret_code = err; -err_ret: - nf->namelen = 0; - ntfs_free(nf->name); - nf->name = NULL; - return err; -} - -/* - * readdir returns '.', then '..', then the directory entries in sequence. - * As the root directory contains an entry for itself, '.' is not emulated for - * the root directory. - */ -static int ntfs_readdir(struct file* filp, void *dirent, filldir_t filldir) -{ - struct inode *dir = filp->f_dentry->d_inode; - int err; - struct ntfs_filldir cb; - - cb.ret_code = 0; - cb.pl = filp->f_pos & 0xffff; - cb.ph = (filp->f_pos >> 16) & 0x7fff; - filp->f_pos = (loff_t)(cb.ph << 16) | cb.pl; - ntfs_debug(DEBUG_OTHER, "%s(): Entering for inode %lu, f_pos 0x%Lx, " - "i_mode 0x%x, i_count %lu.\n", __FUNCTION__, - dir->i_ino, filp->f_pos, (unsigned int)dir->i_mode, - atomic_read(&dir->i_count)); - if (!cb.ph) { - /* Start of directory. Emulate "." and "..". */ - if (!cb.pl) { - ntfs_debug(DEBUG_OTHER, "%s(): Calling filldir for . " - "with len 1, f_pos 0x%Lx, inode %lu, " - "DT_DIR.\n", __FUNCTION__, filp->f_pos, - dir->i_ino); - cb.ret_code = filldir(dirent, ".", 1, filp->f_pos, - dir->i_ino, DT_DIR); - if (cb.ret_code) - goto done; - cb.pl++; - filp->f_pos = (loff_t)(cb.ph << 16) | cb.pl; - } - if (cb.pl == (u32)1) { - ntfs_debug(DEBUG_OTHER, "%s(): Calling filldir for .. " - "with len 2, f_pos 0x%Lx, inode %lu, " - "DT_DIR.\n", __FUNCTION__, filp->f_pos, - filp->f_dentry->d_parent->d_inode->i_ino); - cb.ret_code = filldir(dirent, "..", 2, filp->f_pos, - filp->f_dentry->d_parent->d_inode->i_ino, - DT_DIR); - if (cb.ret_code) - goto done; - cb.pl++; - filp->f_pos = (loff_t)(cb.ph << 16) | cb.pl; - } - } else if (cb.ph >= 0x7fff) - /* End of directory. */ - goto done; - cb.dir = dir; - cb.filldir = filldir; - cb.dirent = dirent; - cb.type = NTFS_INO2VOL(dir)->ngt; - do { - ntfs_debug(DEBUG_OTHER, "%s(): Looking for next file using " - "ntfs_getdir_unsorted(), f_pos 0x%Lx.\n", - __FUNCTION__, (loff_t)(cb.ph << 16) | cb.pl); - err = ntfs_getdir_unsorted(NTFS_LINO2NINO(dir), &cb.ph, &cb.pl, - ntfs_printcb, &cb); - } while (!err && !cb.ret_code && cb.ph < 0x7fff); - filp->f_pos = (loff_t)(cb.ph << 16) | cb.pl; - ntfs_debug(DEBUG_OTHER, "%s(): After ntfs_getdir_unsorted()" - " calls, f_pos 0x%Lx.\n", __FUNCTION__, filp->f_pos); - if (!err) { -done: -#ifdef DEBUG - if (!cb.ret_code) - ntfs_debug(DEBUG_OTHER, "%s(): EOD, f_pos 0x%Lx, " - "returning 0.\n", __FUNCTION__, - filp->f_pos); - else - ntfs_debug(DEBUG_OTHER, "%s(): filldir returned %i, " - "returning 0, f_pos 0x%Lx.\n", - __FUNCTION__, cb.ret_code, filp->f_pos); -#endif - return 0; - } - ntfs_debug(DEBUG_OTHER, "%s(): Returning %i, f_pos 0x%Lx.\n", - __FUNCTION__, err, filp->f_pos); - return err; -} - -/* Copied from vfat driver. */ -static int simple_getbool(char *s, int *setval) -{ - if (s) { - if (!strcmp(s, "1") || !strcmp(s, "yes") || !strcmp(s, "true")) - *setval = 1; - else if (!strcmp(s, "0") || !strcmp(s, "no") || - !strcmp(s, "false")) - *setval = 0; - else - return 0; - } else - *setval = 1; - return 1; -} - -/* - * This needs to be outside parse_options() otherwise a remount will reset - * these unintentionally. - */ -static void init_ntfs_super_block(ntfs_volume* vol) -{ - vol->uid = vol->gid = 0; - vol->umask = 0077; - vol->ngt = ngt_nt; - vol->nls_map = (void*)-1; - vol->mft_zone_multiplier = -1; -} - -/* Parse the (re)mount options. */ -static int parse_options(ntfs_volume *vol, char *opt) -{ - char *value; /* Defaults if not specified and !remount. */ - ntfs_uid_t uid = -1; /* 0, root user only */ - ntfs_gid_t gid = -1; /* 0, root user only */ - int umask = -1; /* 0077, owner access only */ - unsigned int ngt = -1; /* ngt_nt */ - void *nls_map = NULL; /* Try to load the default NLS. */ - int use_utf8 = -1; /* If no NLS specified and loading the default - NLS failed use utf8. */ - int mft_zone_mul = -1; /* 1 */ - - if (!opt) - goto done; - for (opt = strtok(opt, ","); opt; opt = strtok(NULL, ",")) { - if ((value = strchr(opt, '=')) != NULL) - *value ++= '\0'; - if (strcmp(opt, "uid") == 0) { - if (!value || !*value) - goto needs_arg; - uid = simple_strtoul(value, &value, 0); - if (*value) { - printk(KERN_ERR "NTFS: uid invalid argument\n"); - return 0; - } - } else if (strcmp(opt, "gid") == 0) { - if (!value || !*value) - goto needs_arg; - gid = simple_strtoul(value, &value, 0); - if (*value) { - printk(KERN_ERR "NTFS: gid invalid argument\n"); - return 0; - } - } else if (strcmp(opt, "umask") == 0) { - if (!value || !*value) - goto needs_arg; - umask = simple_strtoul(value, &value, 0); - if (*value) { - printk(KERN_ERR "NTFS: umask invalid " - "argument\n"); - return 0; - } - } else if (strcmp(opt, "mft_zone_multiplier") == 0) { - unsigned long ul; - - if (!value || !*value) - goto needs_arg; - ul = simple_strtoul(value, &value, 0); - if (*value) { - printk(KERN_ERR "NTFS: mft_zone_multiplier " - "invalid argument\n"); - return 0; - } - if (ul >= 1 && ul <= 4) - mft_zone_mul = ul; - else { - mft_zone_mul = 1; - printk(KERN_WARNING "NTFS: mft_zone_multiplier " - "out of range. Setting to 1.\n"); - } - } else if (strcmp(opt, "posix") == 0) { - int val; - if (!value || !*value) - goto needs_arg; - if (!simple_getbool(value, &val)) - goto needs_bool; - ngt = val ? ngt_posix : ngt_nt; - } else if (strcmp(opt, "show_sys_files") == 0) { - int val = 0; - if (!value || !*value) - val = 1; - else if (!simple_getbool(value, &val)) - goto needs_bool; - ngt = val ? ngt_full : ngt_nt; - } else if (strcmp(opt, "iocharset") == 0) { - if (!value || !*value) - goto needs_arg; - nls_map = load_nls(value); - if (!nls_map) { - printk(KERN_ERR "NTFS: charset not found"); - return 0; - } - } else if (strcmp(opt, "utf8") == 0) { - int val = 0; - if (!value || !*value) - val = 1; - else if (!simple_getbool(value, &val)) - goto needs_bool; - use_utf8 = val; - } else { - printk(KERN_ERR "NTFS: unkown option '%s'\n", opt); - return 0; - } - } -done: - if (use_utf8 == -1) { - /* utf8 was not specified at all. */ - if (!nls_map) { - /* - * No NLS was specified. If first mount, load the - * default NLS, otherwise don't change the NLS setting. - */ - if (vol->nls_map == (void*)-1) - vol->nls_map = load_nls_default(); - } else { - /* If an NLS was already loaded, unload it first. */ - if (vol->nls_map && vol->nls_map != (void*)-1) - unload_nls(vol->nls_map); - /* Use the specified NLS. */ - vol->nls_map = nls_map; - } - } else { - /* utf8 was specified. */ - if (use_utf8 && nls_map) { - unload_nls(nls_map); - printk(KERN_ERR "NTFS: utf8 cannot be combined with " - "iocharset.\n"); - return 0; - } - /* If an NLS was already loaded, unload it first. */ - if (vol->nls_map && vol->nls_map != (void*)-1) - unload_nls(vol->nls_map); - if (!use_utf8) { - /* utf8 was specified as false. */ - if (!nls_map) - /* No NLS was specified, load the default. */ - vol->nls_map = load_nls_default(); - else - /* Use the specified NLS. */ - vol->nls_map = nls_map; - } else - /* utf8 was specified as true. */ - vol->nls_map = NULL; - } - if (uid != -1) - vol->uid = uid; - if (gid != -1) - vol->gid = gid; - if (umask != -1) - vol->umask = (ntmode_t)umask; - if (ngt != -1) - vol->ngt = ngt; - if (mft_zone_mul != -1) { - /* mft_zone_multiplier was specified. */ - if (vol->mft_zone_multiplier != -1) { - /* This is a remount, ignore a change and warn user. */ - if (vol->mft_zone_multiplier != mft_zone_mul) - printk(KERN_WARNING "NTFS: Ignoring changes in " - "mft_zone_multiplier on " - "remount. If you want to " - "change this you need to " - "umount and mount again.\n"); - } else - /* Use the specified multiplier. */ - vol->mft_zone_multiplier = mft_zone_mul; - } else if (vol->mft_zone_multiplier == -1) - /* No multiplier specified and first mount, so set default. */ - vol->mft_zone_multiplier = 1; - return 1; -needs_arg: - printk(KERN_ERR "NTFS: %s needs an argument", opt); - return 0; -needs_bool: - printk(KERN_ERR "NTFS: %s needs boolean argument", opt); - return 0; -} - -static struct dentry *ntfs_lookup(struct inode *dir, struct dentry *d) -{ - struct inode *res = 0; - char *item = 0; - ntfs_iterate_s walk; - int err; - - ntfs_debug(DEBUG_NAME1, "%s(): Looking up %s in directory ino 0x%x.\n", - __FUNCTION__, d->d_name.name, (unsigned)dir->i_ino); - walk.name = NULL; - walk.namelen = 0; - /* Convert to wide string. */ - err = ntfs_decodeuni(NTFS_INO2VOL(dir), (char*)d->d_name.name, - d->d_name.len, &walk.name, &walk.namelen); - if (err) - goto err_ret; - item = ntfs_malloc(ITEM_SIZE); - if (!item) { - err = -ENOMEM; - goto err_ret; - } - /* ntfs_getdir will place the directory entry into item, and the first - * long long is the MFT record number. */ - walk.type = BY_NAME; - walk.dir = NTFS_LINO2NINO(dir); - walk.result = item; - if (ntfs_getdir_byname(&walk)) - res = iget(dir->i_sb, NTFS_GETU32(item)); - d_add(d, res); - ntfs_free(item); - ntfs_free(walk.name); - /* Always return success, the dcache will handle negative entries. */ - return NULL; -err_ret: - ntfs_free(walk.name); - return ERR_PTR(err); -} - -static struct file_operations ntfs_file_operations = { - llseek: generic_file_llseek, - read: ntfs_read, -#ifdef CONFIG_NTFS_RW - write: ntfs_write, -#endif - open: generic_file_open, -}; - -static struct inode_operations ntfs_inode_operations; - -#ifdef CONFIG_NTFS_RW -static int ntfs_create(struct inode* dir, struct dentry *d, int mode) -{ - struct inode *r = 0; - ntfs_inode *ino = 0; - ntfs_volume *vol; - int error = 0; - ntfs_attribute *si; - - r = new_inode(dir->i_sb); - if (!r) { - error = -ENOMEM; - goto fail; - } - ntfs_debug(DEBUG_OTHER, "ntfs_create %s\n", d->d_name.name); - vol = NTFS_INO2VOL(dir); - ino = NTFS_LINO2NINO(r); - error = ntfs_alloc_file(NTFS_LINO2NINO(dir), ino, (char*)d->d_name.name, - d->d_name.len); - if (error) { - ntfs_error("ntfs_alloc_file FAILED: error = %i", error); - goto fail; - } - /* Not doing this one was causing a huge amount of corruption! Now the - * bugger bytes the dust! (-8 (AIA) */ - r->i_ino = ino->i_number; - error = ntfs_update_inode(ino); - if (error) - goto fail; - error = ntfs_update_inode(NTFS_LINO2NINO(dir)); - if (error) - goto fail; - r->i_uid = vol->uid; - r->i_gid = vol->gid; - /* FIXME: dirty? dev? */ - /* Get the file modification times from the standard information. */ - si = ntfs_find_attr(ino, vol->at_standard_information, NULL); - if (si) { - char *attr = si->d.data; - r->i_atime = ntfs_ntutc2unixutc(NTFS_GETU64(attr + 0x18)); - r->i_ctime = ntfs_ntutc2unixutc(NTFS_GETU64(attr)); - r->i_mtime = ntfs_ntutc2unixutc(NTFS_GETU64(attr + 8)); - } - /* It's not a directory */ - r->i_op = &ntfs_inode_operations; - r->i_fop = &ntfs_file_operations; - r->i_mode = S_IFREG | S_IRUGO; -#ifdef CONFIG_NTFS_RW - r->i_mode |= S_IWUGO; -#endif - r->i_mode &= ~vol->umask; - insert_inode_hash(r); - d_instantiate(d, r); - return 0; - fail: - if (r) - iput(r); - return error; -} - -static int _linux_ntfs_mkdir(struct inode *dir, struct dentry* d, int mode) -{ - int error; - struct inode *r = 0; - ntfs_volume *vol; - ntfs_inode *ino; - ntfs_attribute *si; - - ntfs_debug (DEBUG_DIR1, "mkdir %s in %x\n", d->d_name.name, dir->i_ino); - error = -ENAMETOOLONG; - if (d->d_name.len > /* FIXME: */ 255) - goto out; - error = -EIO; - r = new_inode(dir->i_sb); - if (!r) - goto out; - vol = NTFS_INO2VOL(dir); - ino = NTFS_LINO2NINO(r); - error = ntfs_mkdir(NTFS_LINO2NINO(dir), d->d_name.name, d->d_name.len, - ino); - if (error) - goto out; - /* Not doing this one was causing a huge amount of corruption! Now the - * bugger bytes the dust! (-8 (AIA) */ - r->i_ino = ino->i_number; - r->i_uid = vol->uid; - r->i_gid = vol->gid; - si = ntfs_find_attr(ino, vol->at_standard_information, NULL); - if (si) { - char *attr = si->d.data; - r->i_atime = ntfs_ntutc2unixutc(NTFS_GETU64(attr + 0x18)); - r->i_ctime = ntfs_ntutc2unixutc(NTFS_GETU64(attr)); - r->i_mtime = ntfs_ntutc2unixutc(NTFS_GETU64(attr + 8)); - } - /* It's a directory. */ - r->i_op = &ntfs_dir_inode_operations; - r->i_fop = &ntfs_dir_operations; - r->i_mode = S_IFDIR | S_IRUGO | S_IXUGO; -#ifdef CONFIG_NTFS_RW - r->i_mode |= S_IWUGO; -#endif - r->i_mode &= ~vol->umask; - - insert_inode_hash(r); - d_instantiate(d, r); - error = 0; - out: - ntfs_debug (DEBUG_DIR1, "mkdir returns %d\n", error); - return error; -} -#endif - -static struct file_operations ntfs_dir_operations = { - read: generic_read_dir, - readdir: ntfs_readdir, -}; - -static struct inode_operations ntfs_dir_inode_operations = { - lookup: ntfs_lookup, -#ifdef CONFIG_NTFS_RW - create: ntfs_create, - mkdir: _linux_ntfs_mkdir, -#endif -}; - -/* ntfs_read_inode() is called by the Virtual File System (the kernel layer - * that deals with filesystems) when iget is called requesting an inode not - * already present in the inode table. Typically filesystems have separate - * inode_operations for directories, files and symlinks. */ -static void ntfs_read_inode(struct inode* inode) -{ - ntfs_volume *vol; - ntfs_inode *ino; - ntfs_attribute *data; - ntfs_attribute *si; - - vol = NTFS_INO2VOL(inode); - inode->i_mode = 0; - ntfs_debug(DEBUG_OTHER, "ntfs_read_inode 0x%lx\n", inode->i_ino); - switch (inode->i_ino) { - /* Those are loaded special files. */ - case FILE_Mft: - if (!vol->mft_ino || ((vol->ino_flags & 1) == 0)) - goto sys_file_error; - ntfs_memcpy(&inode->u.ntfs_i, vol->mft_ino, sizeof(ntfs_inode)); - ino = vol->mft_ino; - vol->mft_ino = &inode->u.ntfs_i; - vol->ino_flags &= ~1; - ntfs_free(ino); - ino = vol->mft_ino; - ntfs_debug(DEBUG_OTHER, "Opening $MFT!\n"); - break; - case FILE_MftMirr: - if (!vol->mftmirr || ((vol->ino_flags & 2) == 0)) - goto sys_file_error; - ntfs_memcpy(&inode->u.ntfs_i, vol->mftmirr, sizeof(ntfs_inode)); - ino = vol->mftmirr; - vol->mftmirr = &inode->u.ntfs_i; - vol->ino_flags &= ~2; - ntfs_free(ino); - ino = vol->mftmirr; - ntfs_debug(DEBUG_OTHER, "Opening $MFTMirr!\n"); - break; - case FILE_BitMap: - if (!vol->bitmap || ((vol->ino_flags & 4) == 0)) - goto sys_file_error; - ntfs_memcpy(&inode->u.ntfs_i, vol->bitmap, sizeof(ntfs_inode)); - ino = vol->bitmap; - vol->bitmap = &inode->u.ntfs_i; - vol->ino_flags &= ~4; - ntfs_free(ino); - ino = vol->bitmap; - ntfs_debug(DEBUG_OTHER, "Opening $Bitmap!\n"); - break; - case FILE_LogFile ... FILE_AttrDef: - /* No need to log root directory accesses. */ - case FILE_Boot ... FILE_UpCase: - ntfs_debug(DEBUG_OTHER, "Opening system file %i!\n", - inode->i_ino); - default: - ino = &inode->u.ntfs_i; - if (!ino || ntfs_init_inode(ino, NTFS_INO2VOL(inode), - inode->i_ino)) - { - ntfs_debug(DEBUG_OTHER, "NTFS: Error loading inode " - "0x%x\n", (unsigned int)inode->i_ino); - return; - } - } - /* Set uid/gid from mount options */ - inode->i_uid = vol->uid; - inode->i_gid = vol->gid; - inode->i_nlink = 1; - /* Use the size of the data attribute as file size */ - data = ntfs_find_attr(ino, vol->at_data, NULL); - if (!data) - inode->i_size = 0; - else - inode->i_size = data->size; - /* Get the file modification times from the standard information. */ - si = ntfs_find_attr(ino, vol->at_standard_information, NULL); - if (si) { - char *attr = si->d.data; - inode->i_atime = ntfs_ntutc2unixutc(NTFS_GETU64(attr + 0x18)); - inode->i_ctime = ntfs_ntutc2unixutc(NTFS_GETU64(attr)); - inode->i_mtime = ntfs_ntutc2unixutc(NTFS_GETU64(attr + 8)); - } - /* If it has an index root, it's a directory. */ - if (ntfs_find_attr(ino, vol->at_index_root, "$I30")) { - ntfs_attribute *at; - at = ntfs_find_attr(ino, vol->at_index_allocation, "$I30"); - inode->i_size = at ? at->size : 0; - inode->i_op = &ntfs_dir_inode_operations; - inode->i_fop = &ntfs_dir_operations; - inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO; - } else { - inode->i_op = &ntfs_inode_operations; - inode->i_fop = &ntfs_file_operations; - inode->i_mode = S_IFREG | S_IRUGO; - } -#ifdef CONFIG_NTFS_RW - if (!data || !(data->flags & (ATTR_IS_COMPRESSED | ATTR_IS_ENCRYPTED))) - inode->i_mode |= S_IWUGO; -#endif - inode->i_mode &= ~vol->umask; - return; -sys_file_error: - ntfs_error("Critical error. Tried to call ntfs_read_inode() before we " - "have completed read_super() or VFS error.\n"); - // FIXME: Should we panic() at this stage? -} - -#ifdef CONFIG_NTFS_RW -static void ntfs_write_inode(struct inode *ino, int unused) -{ - lock_kernel(); - ntfs_debug(DEBUG_LINUX, "ntfs_write_inode 0x%x\n", ino->i_ino); - ntfs_update_inode(NTFS_LINO2NINO(ino)); - unlock_kernel(); -} -#endif - -static void _ntfs_clear_inode(struct inode *inode) -{ - ntfs_inode *ino; - ntfs_volume *vol; - - lock_kernel(); - ntfs_debug(DEBUG_OTHER, "_ntfs_clear_inode 0x%x\n", inode->i_ino); - vol = NTFS_INO2VOL(inode); - if (!vol) - ntfs_error("_ntfs_clear_inode: vol = NTFS_INO2VOL(inode) is " - "NULL.\n"); - switch (inode->i_ino) { - case FILE_Mft: - if (vol->mft_ino && ((vol->ino_flags & 1) == 0)) { - ino = (ntfs_inode*)ntfs_malloc(sizeof(ntfs_inode)); - ntfs_memcpy(ino, &inode->u.ntfs_i, sizeof(ntfs_inode)); - vol->mft_ino = ino; - vol->ino_flags |= 1; - goto unl_out; - } - break; - case FILE_MftMirr: - if (vol->mftmirr && ((vol->ino_flags & 2) == 0)) { - ino = (ntfs_inode*)ntfs_malloc(sizeof(ntfs_inode)); - ntfs_memcpy(ino, &inode->u.ntfs_i, sizeof(ntfs_inode)); - vol->mftmirr = ino; - vol->ino_flags |= 2; - goto unl_out; - } - break; - case FILE_BitMap: - if (vol->bitmap && ((vol->ino_flags & 4) == 0)) { - ino = (ntfs_inode*)ntfs_malloc(sizeof(ntfs_inode)); - ntfs_memcpy(ino, &inode->u.ntfs_i, sizeof(ntfs_inode)); - vol->bitmap = ino; - vol->ino_flags |= 4; - goto unl_out; - } - break; - /* Nothing. Just clear the inode and exit. */ - } - ntfs_clear_inode(&inode->u.ntfs_i); -unl_out: - unlock_kernel(); - return; -} - -/* Called when umounting a filesystem by do_umount() in fs/super.c. */ -static void ntfs_put_super(struct super_block *sb) -{ - ntfs_volume *vol; - - ntfs_debug(DEBUG_OTHER, "ntfs_put_super\n"); - vol = NTFS_SB2VOL(sb); - ntfs_release_volume(vol); - if (vol->nls_map) - unload_nls(vol->nls_map); - ntfs_debug(DEBUG_OTHER, "ntfs_put_super: done\n"); -} - -/* Called by the kernel when asking for stats. */ -static int ntfs_statfs(struct super_block *sb, struct statfs *sf) -{ - struct inode *mft; - ntfs_volume *vol; - __s64 size; - int error; - - ntfs_debug(DEBUG_OTHER, "ntfs_statfs\n"); - vol = NTFS_SB2VOL(sb); - sf->f_type = NTFS_SUPER_MAGIC; - sf->f_bsize = vol->cluster_size; - error = ntfs_get_volumesize(NTFS_SB2VOL(sb), &size); - if (error) - return error; - sf->f_blocks = size; /* Volumesize is in clusters. */ - size = (__s64)ntfs_get_free_cluster_count(vol->bitmap); - /* Just say zero if the call failed. */ - if (size < 0LL) - size = 0; - sf->f_bfree = sf->f_bavail = size; - ntfs_debug(DEBUG_OTHER, "ntfs_statfs: calling mft = iget(sb, " - "FILE_Mft)\n"); - mft = iget(sb, FILE_Mft); - ntfs_debug(DEBUG_OTHER, "ntfs_statfs: iget(sb, FILE_Mft) returned " - "0x%x\n", mft); - if (!mft) - return -EIO; - sf->f_files = mft->i_size >> vol->mft_record_size_bits; - ntfs_debug(DEBUG_OTHER, "ntfs_statfs: calling iput(mft)\n"); - iput(mft); - /* Should be read from volume. */ - sf->f_namelen = 255; - return 0; -} - -/* Called when remounting a filesystem by do_remount_sb() in fs/super.c. */ -static int ntfs_remount_fs(struct super_block *sb, int *flags, char *options) -{ - if (!parse_options(NTFS_SB2VOL(sb), options)) - return -EINVAL; - return 0; -} - -/* Define the super block operation that are implemented */ -static struct super_operations ntfs_super_operations = { - read_inode: ntfs_read_inode, -#ifdef CONFIG_NTFS_RW - write_inode: ntfs_write_inode, -#endif - put_super: ntfs_put_super, - statfs: ntfs_statfs, - remount_fs: ntfs_remount_fs, - clear_inode: _ntfs_clear_inode, -}; - -/** - * is_boot_sector_ntfs - check an NTFS boot sector for validity - * @b: buffer containing bootsector to check - * - * Check whether @b contains a valid NTFS boot sector. - * Return 1 if @b is a valid NTFS bootsector or 0 if not. - */ -static int is_boot_sector_ntfs(ntfs_u8 *b) -{ - ntfs_u32 i; - - /* FIXME: We don't use checksumming yet as NT4(SP6a) doesn't either... - * But we might as well have the code ready to do it. (AIA) */ -#if 0 - /* Calculate the checksum. */ - if (b < b + 0x50) { - ntfs_u32 *u; - ntfs_u32 *bi = (ntfs_u32 *)(b + 0x50); - - for (u = bi, i = 0; u < bi; ++u) - i += NTFS_GETU32(*u); - } -#endif - /* Check magic is "NTFS " */ - if (b[3] != 0x4e) goto not_ntfs; - if (b[4] != 0x54) goto not_ntfs; - if (b[5] != 0x46) goto not_ntfs; - if (b[6] != 0x53) goto not_ntfs; - for (i = 7; i < 0xb; ++i) - if (b[i] != 0x20) goto not_ntfs; - /* Check bytes per sector value is between 512 and 4096. */ - if (b[0xb] != 0) goto not_ntfs; - if (b[0xc] > 0x10) goto not_ntfs; - /* Check sectors per cluster value is valid. */ - switch (b[0xd]) { - case 1: case 2: case 4: case 8: case 16: - case 32: case 64: case 128: - break; - default: - goto not_ntfs; - } - /* Check reserved sectors value and four other fields are zero. */ - for (i = 0xe; i < 0x15; ++i) - if (b[i] != 0) goto not_ntfs; - if (b[0x16] != 0) goto not_ntfs; - if (b[0x17] != 0) goto not_ntfs; - for (i = 0x20; i < 0x24; ++i) - if (b[i] != 0) goto not_ntfs; - /* Check clusters per file record segment value is valid. */ - if (b[0x40] < 0xe1 || b[0x40] > 0xf7) { - switch (b[0x40]) { - case 1: case 2: case 4: case 8: case 16: case 32: case 64: - break; - default: - goto not_ntfs; - } - } - /* Check clusters per index block value is valid. */ - if (b[0x44] < 0xe1 || b[0x44] > 0xf7) { - switch (b[0x44]) { - case 1: case 2: case 4: case 8: case 16: case 32: case 64: - break; - default: - goto not_ntfs; - } - } - return 1; -not_ntfs: - return 0; -} - -/* Called to mount a filesystem by read_super() in fs/super.c. - * Return a super block, the main structure of a filesystem. - * - * NOTE : Don't store a pointer to an option, as the page containing the - * options is freed after ntfs_read_super() returns. - * - * NOTE : A context switch can happen in kernel code only if the code blocks - * (= calls schedule() in kernel/sched.c). */ -struct super_block *ntfs_read_super(struct super_block *sb, void *options, - int silent) -{ - ntfs_volume *vol; - struct buffer_head *bh; - int i, to_read, blocksize; - - ntfs_debug(DEBUG_OTHER, "ntfs_read_super\n"); - vol = NTFS_SB2VOL(sb); - init_ntfs_super_block(vol); - if (!parse_options(vol, (char*)options)) - goto ntfs_read_super_vol; - blocksize = get_hardsect_size(sb->s_dev); - if (blocksize < 512) - blocksize = 512; - if (set_blocksize(sb->s_dev, blocksize) < 0) { - ntfs_error("Unable to set blocksize %d.\n", blocksize); - goto ntfs_read_super_vol; - } - sb->s_blocksize = blocksize; - /* Read the super block (boot block). */ - if (!(bh = sb_bread(sb, 0))) { - ntfs_error("Reading super block failed\n"); - goto ntfs_read_super_unl; - } - ntfs_debug(DEBUG_OTHER, "Done reading boot block\n"); - /* Check for valid 'NTFS' boot sector. */ - if (!is_boot_sector_ntfs(bh->b_data)) { - ntfs_debug(DEBUG_OTHER, "Not a NTFS volume\n"); - bforget(bh); - goto ntfs_read_super_unl; - } - ntfs_debug(DEBUG_OTHER, "Going to init volume\n"); - if (ntfs_init_volume(vol, bh->b_data) < 0) { - ntfs_debug(DEBUG_OTHER, "Init volume failed.\n"); - bforget(bh); - goto ntfs_read_super_unl; - } - ntfs_debug(DEBUG_OTHER, "$Mft at cluster 0x%lx\n", vol->mft_lcn); - brelse(bh); - NTFS_SB(vol) = sb; - if (vol->cluster_size > PAGE_SIZE) { - ntfs_error("Partition cluster size is not supported yet (it " - "is > max kernel blocksize).\n"); - goto ntfs_read_super_unl; - } - ntfs_debug(DEBUG_OTHER, "Done to init volume\n"); - /* Inform the kernel that a device block is a NTFS cluster. */ - sb->s_blocksize = vol->cluster_size; - sb->s_blocksize_bits = vol->cluster_size_bits; - if (blocksize != vol->cluster_size && - set_blocksize(sb->s_dev, sb->s_blocksize) < 0) { - ntfs_error("Cluster size too small for device.\n"); - goto ntfs_read_super_unl; - } - ntfs_debug(DEBUG_OTHER, "set_blocksize\n"); - /* Allocate an MFT record (MFT record can be smaller than a cluster). */ - i = vol->cluster_size; - if (i < vol->mft_record_size) - i = vol->mft_record_size; - if (!(vol->mft = ntfs_malloc(i))) - goto ntfs_read_super_unl; - - /* Read at least the MFT record for $Mft. */ - to_read = vol->mft_clusters_per_record; - if (to_read < 1) - to_read = 1; - for (i = 0; i < to_read; i++) { - if (!(bh = sb_bread(sb, vol->mft_lcn + i))) { - ntfs_error("Could not read $Mft record 0\n"); - goto ntfs_read_super_mft; - } - ntfs_memcpy(vol->mft + ((__s64)i << vol->cluster_size_bits), - bh->b_data, vol->cluster_size); - brelse(bh); - ntfs_debug(DEBUG_OTHER, "Read cluster 0x%x\n", - vol->mft_lcn + i); - } - /* Check and fixup this MFT record */ - if (!ntfs_check_mft_record(vol, vol->mft)){ - ntfs_error("Invalid $Mft record 0\n"); - goto ntfs_read_super_mft; - } - /* Inform the kernel about which super operations are available. */ - sb->s_op = &ntfs_super_operations; - sb->s_magic = NTFS_SUPER_MAGIC; - sb->s_maxbytes = MAX_LFS_FILESIZE; - ntfs_debug(DEBUG_OTHER, "Reading special files\n"); - if (ntfs_load_special_files(vol)) { - ntfs_error("Error loading special files\n"); - goto ntfs_read_super_mft; - } - ntfs_debug(DEBUG_OTHER, "Getting RootDir\n"); - /* Get the root directory. */ - if (!(sb->s_root = d_alloc_root(iget(sb, FILE_root)))) { - ntfs_error("Could not get root dir inode\n"); - goto ntfs_read_super_mft; - } -ntfs_read_super_ret: - ntfs_debug(DEBUG_OTHER, "read_super: done\n"); - return sb; -ntfs_read_super_mft: - ntfs_free(vol->mft); -ntfs_read_super_unl: -ntfs_read_super_vol: - sb = NULL; - goto ntfs_read_super_ret; -} - -/* Define the filesystem */ -static DECLARE_FSTYPE_DEV(ntfs_fs_type, "ntfs", ntfs_read_super); - -static int __init init_ntfs_fs(void) -{ - /* Comment this if you trust klogd. There are reasons not to trust it */ -#if defined(DEBUG) && !defined(MODULE) - console_verbose(); -#endif - printk(KERN_NOTICE "NTFS driver v" NTFS_VERSION " [Flags: R/" -#ifdef CONFIG_NTFS_RW - "W" -#else - "O" -#endif -#ifdef DEBUG - " DEBUG" -#endif -#ifdef MODULE - " MODULE" -#endif - "]\n"); - SYSCTL(1); - ntfs_debug(DEBUG_OTHER, "registering %s\n", ntfs_fs_type.name); - /* Add this filesystem to the kernel table of filesystems. */ - return register_filesystem(&ntfs_fs_type); -} - -static void __exit exit_ntfs_fs(void) -{ - SYSCTL(0); - ntfs_debug(DEBUG_OTHER, "unregistering %s\n", ntfs_fs_type.name); - unregister_filesystem(&ntfs_fs_type); -} - -EXPORT_NO_SYMBOLS; -/* - * Not strictly true. The driver was written originally by Martin von Löwis. - * I am just maintaining and rewriting it. - */ -MODULE_AUTHOR("Anton Altaparmakov "); -MODULE_DESCRIPTION("Linux NTFS driver"); -MODULE_LICENSE("GPL"); -#ifdef DEBUG -MODULE_PARM(ntdebug, "i"); -MODULE_PARM_DESC(ntdebug, "Debug level"); -#endif - -module_init(init_ntfs_fs) -module_exit(exit_ntfs_fs) - diff -urN linux-2.4.24-vanilla/fs/ntfs/inode.c linux-2.4.24-ntfs-2.1.6a/fs/ntfs/inode.c --- linux-2.4.24-vanilla/fs/ntfs/inode.c 2003-11-28 18:26:21.000000000 +0000 +++ linux-2.4.24-ntfs-2.1.6a/fs/ntfs/inode.c 2004-01-21 14:28:25.000000000 +0000 @@ -1,2322 +1,1987 @@ -/* - * inode.c +/** + * inode.c - NTFS kernel inode handling. Part of the Linux-NTFS project. + * + * Copyright (c) 2001-2004 Anton Altaparmakov * - * Copyright (C) 1995-1999 Martin von Löwis - * Copyright (C) 1996 Albert D. Cahalan - * Copyright (C) 1996-1997 Régis Duchesne - * Copyright (C) 1998 Joseph Malicki - * Copyright (C) 1999 Steve Dodd - * Copyright (C) 2000-2001 Anton Altaparmakov (AIA) + * This program/include file is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as published + * by the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program/include file is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program (in the main directory of the Linux-NTFS + * distribution in the file COPYING); if not, write to the Free Software + * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -#include "ntfstypes.h" -#include "ntfsendian.h" -#include "struct.h" -#include "inode.h" -#include -#include "macros.h" -#include "attr.h" -#include "super.h" -#include "dir.h" -#include "support.h" -#include "util.h" -#include + +#include #include +#include -typedef struct { - int recno; - unsigned char *record; -} ntfs_mft_record; - -typedef struct { - int size; - int count; - ntfs_mft_record *records; -} ntfs_disk_inode; - -static void ntfs_fill_mft_header(ntfs_u8 *mft, int rec_size, int seq_no, - int links, int flags) -{ - int fixup_ofs = 0x2a; - int fixup_cnt = rec_size / NTFS_SECTOR_SIZE + 1; - int attr_ofs = (fixup_ofs + 2 * fixup_cnt + 7) & ~7; - - NTFS_PUTU32(mft + 0x00, 0x454c4946); /* FILE */ - NTFS_PUTU16(mft + 0x04, fixup_ofs); /* Offset to fixup. */ - NTFS_PUTU16(mft + 0x06, fixup_cnt); /* Number of fixups. */ - NTFS_PUTU64(mft + 0x08, 0); /* Logical sequence number. */ - NTFS_PUTU16(mft + 0x10, seq_no); /* Sequence number. */ - NTFS_PUTU16(mft + 0x12, links); /* Hard link count. */ - NTFS_PUTU16(mft + 0x14, attr_ofs); /* Offset to attributes. */ - NTFS_PUTU16(mft + 0x16, flags); /* Flags: 1 = In use, - 2 = Directory. */ - NTFS_PUTU32(mft + 0x18, attr_ofs + 8); /* Bytes in use. */ - NTFS_PUTU32(mft + 0x1c, rec_size); /* Total allocated size. */ - NTFS_PUTU64(mft + 0x20, 0); /* Base mft record. */ - NTFS_PUTU16(mft + 0x28, 0); /* Next attr instance. */ - NTFS_PUTU16(mft + fixup_ofs, 1); /* Fixup word. */ - NTFS_PUTU32(mft + attr_ofs, (__u32)-1); /* End of attributes marker. */ -} - -/* - * Search in an inode an attribute by type and name. - * FIXME: Check that when attributes are inserted all attribute list - * attributes are expanded otherwise need to modify this function to deal - * with attribute lists. (AIA) +#include "ntfs.h" +#include "dir.h" +#include "inode.h" +#include "attrib.h" + +/** + * ntfs_test_inode - compare two (possibly fake) inodes for equality + * @vi: vfs inode which to test + * @na: ntfs attribute which is being tested with + * + * Compare the ntfs attribute embedded in the ntfs specific part of the vfs + * inode @vi for equality with the ntfs attribute @na. + * + * If searching for the normal file/directory inode, set @na->type to AT_UNUSED. + * @na->name and @na->name_len are then ignored. + * + * Return 1 if the attributes match and 0 if not. + * + * NOTE: This function runs with the inode_lock spin lock held so it is not + * allowed to sleep. */ -ntfs_attribute *ntfs_find_attr(ntfs_inode *ino, int type, char *name) +static int ntfs_test_inode(struct inode *vi, unsigned long ino, ntfs_attr *na) { - int i; - - if (!ino) { - ntfs_error("ntfs_find_attr: NO INODE!\n"); - return 0; - } - for (i = 0; i < ino->attr_count; i++) { - if (type < ino->attrs[i].type) + ntfs_inode *ni; + +// if (vi->i_ino != na->mft_no) +// return 0; + ni = NTFS_I(vi); + /* If !NInoAttr(ni), @vi is a normal file or directory inode. */ + if (likely(!NInoAttr(ni))) { + /* If not looking for a normal inode this is a mismatch. */ + if (unlikely(na->type != AT_UNUSED)) + return 0; + } else { + /* A fake inode describing an attribute. */ + if (ni->type != na->type) + return 0; + if (ni->name_len != na->name_len) + return 0; + if (na->name_len && memcmp(ni->name, na->name, + na->name_len * sizeof(uchar_t))) return 0; - if (type == ino->attrs[i].type) { - if (!name) { - if (!ino->attrs[i].name) - return ino->attrs + i; - } else if (ino->attrs[i].name && - !ntfs_ua_strncmp(ino->attrs[i].name, name, - strlen(name))) - return ino->attrs + i; - } } - return 0; + /* Match! */ + return 1; } -/* - * Insert all attributes from the record mftno of the MFT in the inode ino. - * If mftno is a base mft record we abort as soon as we find the attribute - * list, but only on the first pass. We will get called later when the attribute - * list attribute is being parsed so we need to distinguish the two cases. - * FIXME: We should be performing structural consistency checks. (AIA) - * Return 0 on success or -errno on error. +/** + * ntfs_init_locked_inode - initialize an inode + * @vi: vfs inode to initialize + * @na: ntfs attribute which to initialize @vi to + * + * Initialize the vfs inode @vi with the values from the ntfs attribute @na in + * order to enable ntfs_test_inode() to do its work. + * + * If initializing the normal file/directory inode, set @na->type to AT_UNUSED. + * In that case, @na->name and @na->name_len should be set to NULL and 0, + * respectively. Although that is not strictly necessary as + * ntfs_read_inode_locked() will fill them in later. + * + * Return 0 on success and -errno on error. + * + * NOTE: This function runs with the inode_lock spin lock held so it is not + * allowed to sleep. (Hence the GFP_ATOMIC allocation.) */ -static int ntfs_insert_mft_attributes(ntfs_inode* ino, char *mft, int mftno) +static int ntfs_init_locked_inode(struct inode *vi, ntfs_attr *na) { - int i, error, type, len, present = 0; - char *it; + ntfs_inode *ni = NTFS_I(vi); - /* Check for duplicate extension record. */ - for(i = 0; i < ino->record_count; i++) - if (ino->records[i] == mftno) { - if (i) - return 0; - present = 1; - break; - } - if (!present) { - /* (re-)allocate space if necessary. */ - if (ino->record_count % 8 == 0) { - int *new; - - new = ntfs_malloc((ino->record_count + 8) * - sizeof(int)); - if (!new) - return -ENOMEM; - if (ino->records) { - for (i = 0; i < ino->record_count; i++) - new[i] = ino->records[i]; - ntfs_free(ino->records); - } - ino->records = new; - } - ino->records[ino->record_count] = mftno; - ino->record_count++; - } - it = mft + NTFS_GETU16(mft + 0x14); /* mft->attrs_offset */ - do { - type = NTFS_GETU32(it); - len = NTFS_GETU32(it + 4); - if (type != -1) { - error = ntfs_insert_attribute(ino, it); - if (error) - return error; - } - /* If we have just processed the attribute list and this is - * the first time we are parsing this (base) mft record then we - * are done so that the attribute list gets parsed before the - * entries in the base mft record. Otherwise we run into - * problems with encountering attributes out of order and when - * this happens with different attribute extents we die. )-: - * This way we are ok as the attribute list is always sorted - * fully and correctly. (-: */ - if (type == 0x20 && !present) - return 0; - it += len; - } while (type != -1); /* Attribute listing ends with type -1. */ - return 0; -} +// vi->i_ino = na->mft_no; -/* - * Insert a single specific attribute from the record mftno of the MFT in the - * inode ino. We disregard the attribute list assuming we have already parsed - * it. - * FIXME: We should be performing structural consistency checks. (AIA) - * Return 0 on success or -errno on error. - */ -static int ntfs_insert_mft_attribute(ntfs_inode* ino, int mftno, - ntfs_u8 *attr) -{ - int i, error, present = 0; + ni->type = na->type; + if (na->type == AT_INDEX_ALLOCATION) + NInoSetMstProtected(ni); - /* Check for duplicate extension record. */ - for(i = 0; i < ino->record_count; i++) - if (ino->records[i] == mftno) { - present = 1; - break; - } - if (!present) { - /* (re-)allocate space if necessary. */ - if (ino->record_count % 8 == 0) { - int *new; - - new = ntfs_malloc((ino->record_count + 8) * - sizeof(int)); - if (!new) - return -ENOMEM; - if (ino->records) { - for (i = 0; i < ino->record_count; i++) - new[i] = ino->records[i]; - ntfs_free(ino->records); - } - ino->records = new; - } - ino->records[ino->record_count] = mftno; - ino->record_count++; - } - if (NTFS_GETU32(attr) == -1) { - ntfs_debug(DEBUG_FILE3, "ntfs_insert_mft_attribute: attribute " - "type is -1.\n"); + ni->name = na->name; + ni->name_len = na->name_len; + + /* If initializing a normal inode, we are done. */ + if (likely(na->type == AT_UNUSED)) return 0; + + /* It is a fake inode. */ + NInoSetAttr(ni); + + /* + * We have I30 global constant as an optimization as it is the name + * in >99.9% of named attributes! The other <0.1% incur a GFP_ATOMIC + * allocation but that is ok. And most attributes are unnamed anyway, + * thus the fraction of named attributes with name != I30 is actually + * absolutely tiny. + */ + if (na->name && na->name_len && na->name != I30) { + unsigned int i; + + i = na->name_len * sizeof(uchar_t); + ni->name = (uchar_t*)kmalloc(i + sizeof(uchar_t), GFP_ATOMIC); + if (!ni->name) + return -ENOMEM; + memcpy(ni->name, na->name, i); + ni->name[i] = cpu_to_le16('\0'); } - error = ntfs_insert_attribute(ino, attr); - if (error) - return error; return 0; } -/* Read and insert all the attributes of an 'attribute list' attribute. - * Return the number of remaining bytes in *plen. */ -static int parse_attributes(ntfs_inode *ino, ntfs_u8 *alist, int *plen) -{ - ntfs_u8 *mft, *attr; - int mftno, l, error; - int last_mft = -1; - int len = *plen; - int tries = 0; - - if (!ino->attr) { - ntfs_error("parse_attributes: called on inode 0x%x without a " - "loaded base mft record.\n", ino->i_number); - return -EINVAL; - } - mft = ntfs_malloc(ino->vol->mft_record_size); - if (!mft) - return -ENOMEM; - while (len > 8) { - l = NTFS_GETU16(alist + 4); - if (l > len) - break; - /* Process an attribute description. */ - mftno = NTFS_GETU32(alist + 0x10); - /* FIXME: The mft reference (alist + 0x10) is __s64. - * - Not a problem unless we encounter a huge partition. - * - Should be consistency checking the sequence numbers - * though! This should maybe happen in - * ntfs_read_mft_record() itself and a hotfix could - * then occur there or the user notified to run - * ntfsck. (AIA) */ - if (mftno != ino->i_number && mftno != last_mft) { -continue_after_loading_mft_data: - last_mft = mftno; - error = ntfs_read_mft_record(ino->vol, mftno, mft); - if (error) { - if (error == -EINVAL && !tries) - goto force_load_mft_data; -failed_reading_mft_data: - ntfs_debug(DEBUG_FILE3, "parse_attributes: " - "ntfs_read_mft_record(mftno = 0x%x) " - "failed\n", mftno); - ntfs_free(mft); - return error; - } - } - attr = ntfs_find_attr_in_mft_rec( - ino->vol, /* ntfs volume */ - mftno == ino->i_number ?/* mft record is: */ - ino->attr: /* base record */ - mft, /* extension record */ - NTFS_GETU32(alist + 0), /* type */ - (wchar_t*)(alist + alist[7]), /* name */ - alist[6], /* name length */ - 1, /* ignore case */ - NTFS_GETU16(alist + 24) /* instance number */ - ); - if (!attr) { - ntfs_error("parse_attributes: mft records 0x%x and/or " - "0x%x corrupt!\n", ino->i_number, mftno); - ntfs_free(mft); - return -EINVAL; /* FIXME: Better error code? (AIA) */ - } - error = ntfs_insert_mft_attribute(ino, mftno, attr); - if (error) { - ntfs_debug(DEBUG_FILE3, "parse_attributes: " - "ntfs_insert_mft_attribute(mftno 0x%x, " - "attribute type 0x%x) failed\n", mftno, - NTFS_GETU32(alist + 0)); - ntfs_free(mft); - return error; - } - len -= l; - alist += l; - } - ntfs_free(mft); - *plen = len; - return 0; -force_load_mft_data: +typedef int (*test_t)(struct inode *, unsigned long, void *); +static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi); + +/** + * ntfs_iget - obtain a struct inode corresponding to a specific normal inode + * @sb: super block of mounted volume + * @mft_no: mft record number / inode number to obtain + * + * Obtain the struct inode corresponding to a specific normal inode (i.e. a + * file or directory). + * + * If the inode is in the cache, it is just returned with an increased + * reference count. Otherwise, a new struct inode is allocated and initialized, + * and finally ntfs_read_locked_inode() is called to read in the inode and + * fill in the remainder of the inode structure. + * + * Return the struct inode on success. Check the return value with IS_ERR() and + * if true, the function failed and the error code is obtained from PTR_ERR(). + */ +struct inode *ntfs_iget(struct super_block *sb, unsigned long mft_no) { - ntfs_u8 *mft2, *attr2; - int mftno2; - int last_mft2 = last_mft; - int len2 = len; - int error2; - int found2 = 0; - ntfs_u8 *alist2 = alist; + struct inode *vi; + ntfs_inode *ni; + ntfs_volume *vol; + ntfs_attr na; + + na.mft_no = mft_no; + na.type = AT_UNUSED; + na.name = NULL; + na.name_len = 0; + + vol = NTFS_SB(sb); + + down(&vol->inode_lock); + vi = iget4(sb, mft_no, (test_t)ntfs_test_inode, &na); + if (!vi) { + up(&vol->inode_lock); + return ERR_PTR(-ENOMEM); + } /* - * We only get here if $DATA wasn't found in $MFT which only happens - * on volume mount when $MFT has an attribute list and there are - * attributes before $DATA which are inside extent mft records. So - * we just skip forward to the $DATA attribute and read that. Then we - * restart which is safe as an attribute will not be inserted twice. - * - * This still will not fix the case where the attribute list is non- - * resident, larger than 1024 bytes, and the $DATA attribute list entry - * is not in the first 1024 bytes. FIXME: This should be implemented - * somehow! Perhaps by passing special error code up to - * ntfs_load_attributes() so it keeps going trying to get to $DATA - * regardless. Then it would have to restart just like we do here. + * vol->inode_lock has been released in ntfs_read_locked_inode() which + * is called by iget4() if the inode is new, otherwise we release it. */ - mft2 = ntfs_malloc(ino->vol->mft_record_size); - if (!mft2) { - ntfs_free(mft); - return -ENOMEM; - } - ntfs_memcpy(mft2, mft, ino->vol->mft_record_size); - while (len2 > 8) { - l = NTFS_GETU16(alist2 + 4); - if (l > len2) - break; - if (NTFS_GETU32(alist2 + 0x0) < ino->vol->at_data) { - len2 -= l; - alist2 += l; - continue; - } - if (NTFS_GETU32(alist2 + 0x0) > ino->vol->at_data) { - if (found2) - break; - /* Uh-oh! It really isn't there! */ - ntfs_error("Either the $MFT is corrupt or, equally " - "likely, the $MFT is too complex for " - "the current driver to handle. Please " - "email the ntfs maintainer that you " - "saw this message. Thank you.\n"); - goto failed_reading_mft_data; - } - /* Process attribute description. */ - mftno2 = NTFS_GETU32(alist2 + 0x10); - if (mftno2 != ino->i_number && mftno2 != last_mft2) { - last_mft2 = mftno2; - error2 = ntfs_read_mft_record(ino->vol, mftno2, mft2); - if (error2) { - ntfs_debug(DEBUG_FILE3, "parse_attributes: " - "ntfs_read_mft_record(mftno2 = 0x%x) " - "failed\n", mftno2); - ntfs_free(mft2); - goto failed_reading_mft_data; - } - } - attr2 = ntfs_find_attr_in_mft_rec( - ino->vol, /* ntfs volume */ - mftno2 == ino->i_number ?/* mft record is: */ - ino->attr: /* base record */ - mft2, /* extension record */ - NTFS_GETU32(alist2 + 0), /* type */ - (wchar_t*)(alist2 + alist2[7]), /* name */ - alist2[6], /* name length */ - 1, /* ignore case */ - NTFS_GETU16(alist2 + 24) /* instance number */ - ); - if (!attr2) { - ntfs_error("parse_attributes: mft records 0x%x and/or " - "0x%x corrupt!\n", ino->i_number, - mftno2); - ntfs_free(mft2); - goto failed_reading_mft_data; - } - error2 = ntfs_insert_mft_attribute(ino, mftno2, attr2); - if (error2) { - ntfs_debug(DEBUG_FILE3, "parse_attributes: " - "ntfs_insert_mft_attribute(mftno2 0x%x, " - "attribute2 type 0x%x) failed\n", mftno2, - NTFS_GETU32(alist2 + 0)); - ntfs_free(mft2); - goto failed_reading_mft_data; - } - len2 -= l; - alist2 += l; - found2 = 1; - } - ntfs_free(mft2); - tries = 1; - goto continue_after_loading_mft_data; -} -} - -static void ntfs_load_attributes(ntfs_inode *ino) -{ - ntfs_attribute *alist; - int datasize; - int offset, len, delta; - char *buf; - ntfs_volume *vol = ino->vol; - - ntfs_debug(DEBUG_FILE2, "load_attributes 0x%x 1\n", ino->i_number); - if (ntfs_insert_mft_attributes(ino, ino->attr, ino->i_number)) - return; - ntfs_debug(DEBUG_FILE2, "load_attributes 0x%x 2\n", ino->i_number); - alist = ntfs_find_attr(ino, vol->at_attribute_list, 0); - ntfs_debug(DEBUG_FILE2, "load_attributes 0x%x 3\n", ino->i_number); - if (!alist) - return; - ntfs_debug(DEBUG_FILE2, "load_attributes 0x%x 4\n", ino->i_number); - datasize = alist->size; - ntfs_debug(DEBUG_FILE2, "load_attributes 0x%x: alist->size = 0x%x\n", - ino->i_number, alist->size); - if (alist->resident) { - parse_attributes(ino, alist->d.data, &datasize); - return; - } - ntfs_debug(DEBUG_FILE2, "load_attributes 0x%x 5\n", ino->i_number); - buf = ntfs_malloc(1024); - if (!buf) /* FIXME: Should be passing error code to caller. (AIA) */ - return; - delta = 0; - for (offset = 0; datasize; datasize -= len, offset += len) { - ntfs_io io; - - io.fn_put = ntfs_put; - io.fn_get = 0; - io.param = buf + delta; - len = 1024 - delta; - if (len > datasize) - len = datasize; - ntfs_debug(DEBUG_FILE2, "load_attributes 0x%x: len = %i\n", - ino->i_number, len); - ntfs_debug(DEBUG_FILE2, "load_attributes 0x%x: delta = %i\n", - ino->i_number, delta); - io.size = len; - if (ntfs_read_attr(ino, vol->at_attribute_list, 0, offset, - &io)) - ntfs_error("error in load_attributes\n"); - delta += len; - ntfs_debug(DEBUG_FILE2, "load_attributes 0x%x: after += len, " - "delta = %i\n", ino->i_number, delta); - parse_attributes(ino, buf, &delta); - ntfs_debug(DEBUG_FILE2, "load_attributes 0x%x: after " - "parse_attr, delta = %i\n", ino->i_number, - delta); - if (delta) - /* Move remaining bytes to buffer start. */ - ntfs_memmove(buf, buf + len - delta, delta); - } - ntfs_debug(DEBUG_FILE2, "load_attributes 0x%x 6\n", ino->i_number); - ntfs_free(buf); -} - -int ntfs_init_inode(ntfs_inode *ino, ntfs_volume *vol, int inum) -{ - char *buf; - int error; - - ntfs_debug(DEBUG_FILE1, "Initializing inode 0x%x\n", inum); - ino->i_number = inum; - ino->vol = vol; - ino->attr = buf = ntfs_malloc(vol->mft_record_size); - if (!buf) - return -ENOMEM; - error = ntfs_read_mft_record(vol, inum, ino->attr); - if (error) { - ntfs_debug(DEBUG_OTHER, "Init inode: 0x%x failed\n", inum); - return error; - } - ntfs_debug(DEBUG_FILE2, "Init inode: got mft 0x%x\n", inum); - ino->sequence_number = NTFS_GETU16(buf + 0x10); - ino->attr_count = 0; - ino->record_count = 0; - ino->records = 0; - ino->attrs = 0; - ntfs_load_attributes(ino); - ntfs_debug(DEBUG_FILE2, "Init inode: done 0x%x\n", inum); - return 0; + ni = NTFS_I(vi); + if (!test_and_clear_bit(NI_New, &ni->state)) + up(&vol->inode_lock); + return vi; } -void ntfs_clear_inode(ntfs_inode *ino) +/** + * ntfs_attr_iget - obtain a struct inode corresponding to an attribute + * @base_vi: vfs base inode containing the attribute + * @type: attribute type + * @name: Unicode name of the attribute (NULL if unnamed) + * @name_len: length of @name in Unicode characters (0 if unnamed) + * + * Obtain the (fake) struct inode corresponding to the attribute specified by + * @type, @name, and @name_len, which is present in the base mft record + * specified by the vfs inode @base_vi. + * + * If the attribute inode is in the cache, it is just returned with an + * increased reference count. Otherwise, a new struct inode is allocated and + * initialized, and finally ntfs_read_locked_attr_inode() is called to read the + * attribute and fill in the inode structure. + * + * Return the struct inode of the attribute inode on success. Check the return + * value with IS_ERR() and if true, the function failed and the error code is + * obtained from PTR_ERR(). + */ +struct inode *ntfs_attr_iget(struct inode *base_vi, ATTR_TYPES type, + uchar_t *name, u32 name_len) { - int i; - if (!ino->attr) { - ntfs_error("ntfs_clear_inode: double free\n"); - return; - } - ntfs_free(ino->attr); - ino->attr = 0; - ntfs_free(ino->records); - ino->records = 0; - for (i = 0; i < ino->attr_count; i++) { - if (ino->attrs[i].name) - ntfs_free(ino->attrs[i].name); - if (ino->attrs[i].resident) { - if (ino->attrs[i].d.data) - ntfs_free(ino->attrs[i].d.data); - } else { - if (ino->attrs[i].d.r.runlist) - ntfs_vfree(ino->attrs[i].d.r.runlist); - } + struct inode *vi; + ntfs_inode *ni; + ntfs_volume *vol; + ntfs_attr na; + + na.mft_no = base_vi->i_ino; + na.type = type; + na.name = name; + na.name_len = name_len; + na.base_vi = base_vi; + + vol = NTFS_SB(base_vi->i_sb); + + down(&vol->inode_lock); + vi = iget4(base_vi->i_sb, na.mft_no, (test_t)ntfs_test_inode, &na); + if (!vi) { + up(&vol->inode_lock); + return ERR_PTR(-ENOMEM); } - ntfs_free(ino->attrs); - ino->attrs = 0; + /* + * vol->inode_lock has been released in ntfs_read_locked_inode() which + * is called by iget4() if the inode is new, otherwise we release it. + */ + ni = NTFS_I(vi); + if (!test_and_clear_bit(NI_New, &ni->state)) + up(&vol->inode_lock); + /* + * There is no point in keeping bad attribute inodes around. This also + * simplifies things in that we never need to check for bad attribute + * inodes elsewhere. + */ + if (is_bad_inode(vi)) { + iput(vi); + vi = ERR_PTR(-EIO); + } + return vi; } -/* Check and fixup a MFT record. */ -int ntfs_check_mft_record(ntfs_volume *vol, char *record) +static inline ntfs_inode *ntfs_alloc_extent_inode(void) { - return ntfs_fixup_record(record, "FILE", vol->mft_record_size); + ntfs_inode *ni = (ntfs_inode *)kmem_cache_alloc(ntfs_inode_cache, + SLAB_NOFS); + ntfs_debug("Entering."); + if (likely(ni != NULL)) { + ni->state = 0; + return ni; + } + ntfs_error(NULL, "Allocation of NTFS inode structure failed."); + return NULL; } -/* Return (in result) the value indicating the next available attribute - * chunk number. Works for inodes w/o extension records only. */ -int ntfs_allocate_attr_number(ntfs_inode *ino, int *result) +void ntfs_destroy_extent_inode(ntfs_inode *ni) { - if (ino->record_count != 1) - return -EOPNOTSUPP; - *result = NTFS_GETU16(ino->attr + 0x28); - NTFS_PUTU16(ino->attr + 0x28, (*result) + 1); - return 0; -} - -/* Find the location of an attribute in the inode. A name of NULL indicates - * unnamed attributes. Return pointer to attribute or NULL if not found. */ -char *ntfs_get_attr(ntfs_inode *ino, int attr, char *name) -{ - /* Location of first attribute. */ - char *it = ino->attr + NTFS_GETU16(ino->attr + 0x14); - int type; - int len; - - /* Only check for magic DWORD here, fixup should have happened before.*/ - if (!IS_MFT_RECORD(ino->attr)) - return 0; - do { - type = NTFS_GETU32(it); - len = NTFS_GETU16(it + 4); - /* We found the attribute type. Is the name correct, too? */ - if (type == attr) { - int namelen = NTFS_GETU8(it + 9); - char *name_it, *n = name; - /* Match given name and attribute name if present. - Make sure attribute name is Unicode. */ - if (!name) { - goto check_namelen; - } else if (namelen) { - for (name_it = it + NTFS_GETU16(it + 10); - namelen; n++, name_it += 2, namelen--) - if (*name_it != *n || name_it[1]) - break; -check_namelen: - if (!namelen) - break; - } - } - it += len; - } while (type != -1); /* List of attributes ends with type -1. */ - if (type == -1) - return 0; - return it; + ntfs_debug("Entering."); + BUG_ON(ni->page); + if (!atomic_dec_and_test(&ni->count)) + BUG(); + kmem_cache_free(ntfs_inode_cache, ni); } -__s64 ntfs_get_attr_size(ntfs_inode *ino, int type, char *name) -{ - ntfs_attribute *attr = ntfs_find_attr(ino, type, name); - if (!attr) - return 0; - return - attr->size; -} - -int ntfs_attr_is_resident(ntfs_inode *ino, int type, char *name) +/** + * __ntfs_init_inode - initialize ntfs specific part of an inode + * @sb: super block of mounted volume + * @ni: freshly allocated ntfs inode which to initialize + * + * Initialize an ntfs inode to defaults. + * + * NOTE: ni->mft_no, ni->state, ni->type, ni->name, and ni->name_len are left + * untouched. Make sure to initialize them elsewhere. + * + * Return zero on success and -ENOMEM on error. + */ +static void __ntfs_init_inode(struct super_block *sb, ntfs_inode *ni) { - ntfs_attribute *attr = ntfs_find_attr(ino, type, name); - if (!attr) - return 0; - return attr->resident; + ntfs_debug("Entering."); + ni->initialized_size = ni->allocated_size = 0; + ni->seq_no = 0; + atomic_set(&ni->count, 1); + ni->vol = NTFS_SB(sb); + init_run_list(&ni->run_list); + init_MUTEX(&ni->mrec_lock); + ni->page = NULL; + ni->page_ofs = 0; + ni->attr_list_size = 0; + ni->attr_list = NULL; + init_run_list(&ni->attr_list_rl); + ni->itype.index.bmp_ino = NULL; + ni->itype.index.block_size = 0; + ni->itype.index.vcn_size = 0; + ni->itype.index.block_size_bits = 0; + ni->itype.index.vcn_size_bits = 0; + init_MUTEX(&ni->extent_lock); + ni->nr_extents = 0; + ni->ext.base_ntfs_ino = NULL; + return; +} + +static inline void ntfs_init_big_inode(struct inode *vi) +{ + ntfs_inode *ni = NTFS_I(vi); + + ntfs_debug("Entering."); + __ntfs_init_inode(vi->i_sb, ni); + ni->mft_no = vi->i_ino; + return; +} + +inline ntfs_inode *ntfs_new_extent_inode(struct super_block *sb, + unsigned long mft_no) +{ + ntfs_inode *ni = ntfs_alloc_extent_inode(); + + ntfs_debug("Entering."); + if (likely(ni != NULL)) { + __ntfs_init_inode(sb, ni); + ni->mft_no = mft_no; + ni->type = AT_UNUSED; + ni->name = NULL; + ni->name_len = 0; + } + return ni; } - -/* - * A run is coded as a type indicator, an unsigned length, and a signed cluster - * offset. - * . To save space, length and offset are fields of variable length. The low - * nibble of the type indicates the width of the length :), the high nibble - * the width of the offset. - * . The first offset is relative to cluster 0, later offsets are relative to - * the previous cluster. + +/** + * ntfs_is_extended_system_file - check if a file is in the $Extend directory + * @ctx: initialized attribute search context * - * This function decodes a run. Length is an output parameter, data and cluster - * are in/out parameters. + * Search all file name attributes in the inode described by the attribute + * search context @ctx and check if any of the names are in the $Extend system + * directory. + * + * Return values: + * 1: file is in $Extend directory + * 0: file is not in $Extend directory + * -EIO: file is corrupt */ -int ntfs_decompress_run(unsigned char **data, int *length, - ntfs_cluster_t *cluster, int *ctype) +static int ntfs_is_extended_system_file(attr_search_context *ctx) { - unsigned char type = *(*data)++; - *ctype = 0; - switch (type & 0xF) { - case 1: - *length = NTFS_GETS8(*data); - break; - case 2: - *length = NTFS_GETS16(*data); - break; - case 3: - *length = NTFS_GETS24(*data); - break; - case 4: - *length = NTFS_GETS32(*data); - break; - /* Note: cases 5-8 are probably pointless to code, since how - * many runs > 4GB of length are there? At the most, cases 5 - * and 6 are probably necessary, and would also require making - * length 64-bit throughout. */ - default: - ntfs_error("Can't decode run type field 0x%x\n", type); - return -1; - } -// ntfs_debug(DEBUG_FILE3, "ntfs_decompress_run: length = 0x%x\n",*length); - if (*length < 0) - { - ntfs_error("Negative run length decoded\n"); - return -1; - } - *data += (type & 0xF); - switch (type & 0xF0) { - case 0: - *ctype = 2; - break; - case 0x10: - *cluster += NTFS_GETS8(*data); - break; - case 0x20: - *cluster += NTFS_GETS16(*data); - break; - case 0x30: - *cluster += NTFS_GETS24(*data); - break; - case 0x40: - *cluster += NTFS_GETS32(*data); - break; -#if 0 /* Keep for future, in case ntfs_cluster_t ever becomes 64bit. */ - case 0x50: - *cluster += NTFS_GETS40(*data); - break; - case 0x60: - *cluster += NTFS_GETS48(*data); - break; - case 0x70: - *cluster += NTFS_GETS56(*data); - break; - case 0x80: - *cluster += NTFS_GETS64(*data); - break; -#endif - default: - ntfs_error("Can't decode run type field 0x%x\n", type); - return -1; - } -// ntfs_debug(DEBUG_FILE3, "ntfs_decompress_run: cluster = 0x%x\n", -// *cluster); - *data += (type >> 4); - return 0; -} + int nr_links; -static void dump_runlist(const ntfs_runlist *rl, const int rlen); + /* Restart search. */ + reinit_attr_search_ctx(ctx); -/* - * FIXME: ntfs_readwrite_attr() has the effect of writing @dest to @offset of - * the attribute value of the attribute @attr in the in memory inode @ino. - * If the attribute value of @attr is non-resident the value's contents at - * @offset are actually written to disk (from @dest). The on disk mft record - * describing the non-resident attribute value is not updated! - * If the attribute value is resident then the value is written only in - * memory. The on disk mft record containing the value is not written to disk. - * A possible fix would be to call ntfs_update_inode() before returning. (AIA) - */ -/* Reads l bytes of the attribute (attr, name) of ino starting at offset on - * vol into buf. Returns the number of bytes read in the ntfs_io struct. - * Returns 0 on success, errno on failure */ -int ntfs_readwrite_attr(ntfs_inode *ino, ntfs_attribute *attr, __s64 offset, - ntfs_io *dest) -{ - int rnum, s_vcn, error, clustersizebits; - ntfs_cluster_t cluster, s_cluster, vcn, len; - __s64 l, chunk, copied; - - ntfs_debug(DEBUG_FILE3, "%s(): %s 0x%x bytes at offset " - "0x%Lx %s inode 0x%x, attr type 0x%x.\n", __FUNCTION__, - dest->do_read ? "Read" : "Write", dest->size, offset, - dest->do_read ? "from" : "to", ino->i_number, - attr->type); - l = dest->size; - if (l == 0) - return 0; - if (dest->do_read) { - /* If read _starts_ beyond end of stream, return nothing. */ - if (offset >= attr->size) { - dest->size = 0; - return 0; - } - /* If read _extends_ beyond end of stream, return as much - * initialised data as we have. */ - if (offset + l >= attr->size) - l = dest->size = attr->size - offset; - } else { + /* Get number of hard links. */ + nr_links = le16_to_cpu(ctx->mrec->link_count); + + /* Loop through all hard links. */ + while (lookup_attr(AT_FILE_NAME, NULL, 0, 0, 0, NULL, 0, ctx)) { + FILE_NAME_ATTR *file_name_attr; + ATTR_RECORD *attr = ctx->attr; + u8 *p, *p2; + + nr_links--; /* - * If write extends beyond _allocated_ size, extend attribute, - * updating attr->allocated and attr->size in the process. (AIA) + * Maximum sanity checking as we are called on an inode that + * we suspect might be corrupt. */ - if ((!attr->resident && offset + l > attr->allocated) || - (attr->resident && offset + l > attr->size)) { - error = ntfs_resize_attr(ino, attr, offset + l); - if (error) - return error; - } - if (!attr->resident) { - /* Has amount of data increased? */ - if (offset + l > attr->size) - attr->size = offset + l; - /* Has amount of initialised data increased? */ - if (offset + l > attr->initialized) { - /* FIXME: Clear the section between the old - * initialised length and the write start. - * (AIA) */ - attr->initialized = offset + l; - } + p = (u8*)attr + le32_to_cpu(attr->length); + if (p < (u8*)ctx->mrec || (u8*)p > (u8*)ctx->mrec + + le32_to_cpu(ctx->mrec->bytes_in_use)) { +err_corrupt_attr: + ntfs_error(ctx->ntfs_ino->vol->sb, "Corrupt file name " + "attribute. You should run chkdsk."); + return -EIO; } - } - if (attr->resident) { - if (dest->do_read) - dest->fn_put(dest, (ntfs_u8*)attr->d.data + offset, l); - else - dest->fn_get((ntfs_u8*)attr->d.data + offset, dest, l); - dest->size = l; - return 0; - } - if (dest->do_read) { - /* Read uninitialized data. */ - if (offset >= attr->initialized) - return ntfs_read_zero(dest, l); - if (offset + l > attr->initialized) { - dest->size = chunk = attr->initialized - offset; - error = ntfs_readwrite_attr(ino, attr, offset, dest); - if (error || (dest->size != chunk && (error = -EIO, 1))) - return error; - dest->size += l - chunk; - return ntfs_read_zero(dest, l - chunk); + if (attr->non_resident) { + ntfs_error(ctx->ntfs_ino->vol->sb, "Non-resident file " + "name. You should run chkdsk."); + return -EIO; } - if (attr->flags & ATTR_IS_COMPRESSED) - return ntfs_read_compressed(ino, attr, offset, dest); - } else { - if (attr->flags & ATTR_IS_COMPRESSED) - return ntfs_write_compressed(ino, attr, offset, dest); - } - vcn = 0; - clustersizebits = ino->vol->cluster_size_bits; - s_vcn = offset >> clustersizebits; - for (rnum = 0; rnum < attr->d.r.len && - vcn + attr->d.r.runlist[rnum].len <= s_vcn; rnum++) - vcn += attr->d.r.runlist[rnum].len; - if (rnum == attr->d.r.len) { - ntfs_debug(DEBUG_FILE3, "%s(): EOPNOTSUPP: " - "inode = 0x%x, rnum = %i, offset = 0x%Lx, vcn = 0x%x, " - "s_vcn = 0x%x.\n", __FUNCTION__, ino->i_number, rnum, - offset, vcn, s_vcn); - dump_runlist(attr->d.r.runlist, attr->d.r.len); - /*FIXME: Should extend runlist. */ - return -EOPNOTSUPP; - } - copied = 0; - while (l) { - s_vcn = offset >> clustersizebits; - cluster = attr->d.r.runlist[rnum].lcn; - len = attr->d.r.runlist[rnum].len; - s_cluster = cluster + s_vcn - vcn; - chunk = ((__s64)(vcn + len) << clustersizebits) - offset; - if (chunk > l) - chunk = l; - dest->size = chunk; - error = ntfs_getput_clusters(ino->vol, s_cluster, offset - - ((__s64)s_vcn << clustersizebits), dest); - if (error) { - ntfs_error("Read/write error.\n"); - dest->size = copied; - return error; - } - l -= chunk; - copied += chunk; - offset += chunk; - if (l && offset >= ((__s64)(vcn + len) << clustersizebits)) { - rnum++; - vcn += len; - cluster = attr->d.r.runlist[rnum].lcn; - len = attr->d.r.runlist[rnum].len; + if (attr->flags) { + ntfs_error(ctx->ntfs_ino->vol->sb, "File name with " + "invalid flags. You should run " + "chkdsk."); + return -EIO; + } + if (!(attr->data.resident.flags & RESIDENT_ATTR_IS_INDEXED)) { + ntfs_error(ctx->ntfs_ino->vol->sb, "Unindexed file " + "name. You should run chkdsk."); + return -EIO; } + file_name_attr = (FILE_NAME_ATTR*)((u8*)attr + + le16_to_cpu(attr->data.resident.value_offset)); + p2 = (u8*)attr + le32_to_cpu(attr->data.resident.value_length); + if (p2 < (u8*)attr || p2 > p) + goto err_corrupt_attr; + /* This attribute is ok, but is it in the $Extend directory? */ + if (MREF_LE(file_name_attr->parent_directory) == FILE_Extend) + return 1; /* YES, it's an extended system file. */ + } + if (nr_links) { + ntfs_error(ctx->ntfs_ino->vol->sb, "Inode hard link count " + "doesn't match number of name attributes. You " + "should run chkdsk."); + return -EIO; } - dest->size = copied; - return 0; + return 0; /* NO, it is not an extended system file. */ } -int ntfs_read_attr(ntfs_inode *ino, int type, char *name, __s64 offset, - ntfs_io *buf) +/** + * ntfs_read_locked_inode - read an inode from its device + * @vi: inode to read + * + * ntfs_read_locked_inode() is called from ntfs_iget() to read the inode + * described by @vi into memory from the device. + * + * The only fields in @vi that we need to/can look at when the function is + * called are i_sb, pointing to the mounted device's super block, and i_ino, + * the number of the inode to load. If this is a fake inode, i.e. NInoAttr(), + * then the fields type, name, and name_len are also valid, and describe the + * attribute which this fake inode represents. + * + * ntfs_read_locked_inode() maps, pins and locks the mft record number i_ino + * for reading and sets up the necessary @vi fields as well as initializing + * the ntfs inode. + * + * Q: What locks are held when the function is called? + * A: i_state has I_LOCK set, hence the inode is locked, also + * i_count is set to 1, so it is not going to go away + * i_flags is set to 0 and we have no business touching it. Only an ioctl() + * is allowed to write to them. We should of course be honouring them but + * we need to do that using the IS_* macros defined in include/linux/fs.h. + * In any case ntfs_read_locked_inode() has nothing to do with i_flags. + * + * Return 0 on success and -errno on error. In the error case, the inode will + * have had make_bad_inode() executed on it. + */ +void ntfs_read_locked_inode(struct inode *vi, ntfs_attr *na) { - ntfs_attribute *attr; + ntfs_volume *vol = NTFS_SB(vi->i_sb); + ntfs_inode *ni; + MFT_RECORD *m; + STANDARD_INFORMATION *si; + attr_search_context *ctx; + int err = 0; - buf->do_read = 1; - attr = ntfs_find_attr(ino, type, name); - if (!attr) { - ntfs_debug(DEBUG_FILE3, "%s(): attr 0x%x not found in inode " - "0x%x\n", __FUNCTION__, type, ino->i_number); - return -EINVAL; + ntfs_debug("Entering for i_ino 0x%lx.", vi->i_ino); + + ni = NTFS_I(vi); + /* + * Initialize the ntfs specific part of @vi special casing + * FILE_MFT/unnamed $DATA attribute, which we need to do at mount time. + */ + if (likely(vi->i_ino || na->type != AT_DATA || na->name_len)) { + ni->state = 0; + err = ntfs_init_locked_inode(vi, na); + set_bit(NI_New, &ni->state); + up(&vol->inode_lock); + if (err) + goto err_out; + /* If this is an attribute inode, deal with it elsewhere. */ + if (na->type != AT_UNUSED) { + ntfs_read_locked_attr_inode(na->base_vi, vi); + return; + } + ntfs_init_big_inode(vi); } - return ntfs_readwrite_attr(ino, attr, offset, buf); -} -int ntfs_write_attr(ntfs_inode *ino, int type, char *name, __s64 offset, - ntfs_io *buf) -{ - ntfs_attribute *attr; - - buf->do_read = 0; - attr = ntfs_find_attr(ino, type, name); - if (!attr) { - ntfs_debug(DEBUG_FILE3, "%s(): attr 0x%x not found in inode " - "0x%x\n", __FUNCTION__, type, ino->i_number); - return -EINVAL; - } - return ntfs_readwrite_attr(ino, attr, offset, buf); -} - -/* -2 = error, -1 = hole, >= 0 means real disk cluster (lcn). */ -int ntfs_vcn_to_lcn(ntfs_inode *ino, int vcn) -{ - int rnum; - ntfs_attribute *data; - - data = ntfs_find_attr(ino, ino->vol->at_data, 0); - if (!data || data->resident || data->flags & (ATTR_IS_COMPRESSED | - ATTR_IS_ENCRYPTED)) - return -2; - if (data->size <= (__s64)vcn << ino->vol->cluster_size_bits) - return -2; - if (data->initialized <= (__s64)vcn << ino->vol->cluster_size_bits) - return -1; - for (rnum = 0; rnum < data->d.r.len && - vcn >= data->d.r.runlist[rnum].len; rnum++) - vcn -= data->d.r.runlist[rnum].len; - if (data->d.r.runlist[rnum].lcn >= 0) - return data->d.r.runlist[rnum].lcn + vcn; - return data->d.r.runlist[rnum].lcn + vcn; -} + /* Setup the generic vfs inode parts now. */ -static int allocate_store(ntfs_volume *vol, ntfs_disk_inode *store, int count) -{ - int i; - - if (store->count > count) - return 0; - if (store->size < count) { - ntfs_mft_record *n = ntfs_malloc((count + 4) * - sizeof(ntfs_mft_record)); - if (!n) - return -ENOMEM; - if (store->size) { - for (i = 0; i < store->size; i++) - n[i] = store->records[i]; - ntfs_free(store->records); - } - store->size = count + 4; - store->records = n; - } - for (i = store->count; i < count; i++) { - store->records[i].record = ntfs_malloc(vol->mft_record_size); - if (!store->records[i].record) - return -ENOMEM; - store->count++; + /* This is the optimal IO size (for stat), not the fs block size. */ + vi->i_blksize = PAGE_CACHE_SIZE; + /* + * This is for checking whether an inode has changed w.r.t. a file so + * that the file can be updated if necessary (compare with f_version). + */ + vi->i_version = ++event; + + vi->i_uid = vol->uid; + vi->i_gid = vol->gid; + vi->i_mode = 0; + + m = map_mft_record(ni); + if (IS_ERR(m)) { + err = PTR_ERR(m); + goto err_out; + } + ctx = get_attr_search_ctx(ni, m); + if (!ctx) { + err = -ENOMEM; + goto unm_err_out; } - return 0; -} -static void deallocate_store(ntfs_disk_inode* store) -{ - int i; - - for (i = 0; i < store->count; i++) - ntfs_free(store->records[i].record); - ntfs_free(store->records); - store->count = store->size = 0; - store->records = 0; -} + if (!(m->flags & MFT_RECORD_IN_USE)) { + ntfs_error(vi->i_sb, "Inode is not in use! You should " + "run chkdsk."); + goto unm_err_out; + } + if (m->base_mft_record) { + ntfs_error(vi->i_sb, "Inode is an extent inode! You should " + "run chkdsk."); + goto unm_err_out; + } -/** - * layout_runs - compress runlist into mapping pairs array - * @attr: attribute containing the runlist to compress - * @rec: destination buffer to hold the mapping pairs array - * @offs: current position in @rec (in/out variable) - * @size: size of the buffer @rec - * - * layout_runs walks the runlist in @attr, compresses it and writes it out the - * resulting mapping pairs array into @rec (up to a maximum of @size bytes are - * written). On entry @offs is the offset in @rec at which to begin writing the - * mapping pairs array. On exit, it contains the offset in @rec of the first - * byte after the end of the mapping pairs array. - */ -static int layout_runs(ntfs_attribute *attr, char *rec, int *offs, int size) -{ - int i, len, offset, coffs; - /* ntfs_cluster_t MUST be signed! (AIA) */ - ntfs_cluster_t cluster, rclus; - ntfs_runlist *rl = attr->d.r.runlist; - cluster = 0; - offset = *offs; - for (i = 0; i < attr->d.r.len; i++) { + /* Transfer information from mft record into vfs and ntfs inodes. */ + ni->seq_no = le16_to_cpu(m->sequence_number); + + /* + * FIXME: Keep in mind that link_count is two for files which have both + * a long file name and a short file name as separate entries, so if + * we are hiding short file names this will be too high. Either we need + * to account for the short file names by subtracting them or we need + * to make sure we delete files even though i_nlink is not zero which + * might be tricky due to vfs interactions. Need to think about this + * some more when implementing the unlink command. + */ + vi->i_nlink = le16_to_cpu(m->link_count); + /* + * FIXME: Reparse points can have the directory bit set even though + * they would be S_IFLNK. Need to deal with this further below when we + * implement reparse points / symbolic links but it will do for now. + * Also if not a directory, it could be something else, rather than + * a regular file. But again, will do for now. + */ + if (m->flags & MFT_RECORD_IS_DIRECTORY) { + vi->i_mode |= S_IFDIR; + /* Things break without this kludge! */ + if (vi->i_nlink > 1) + vi->i_nlink = 1; + } else + vi->i_mode |= S_IFREG; + + /* + * Find the standard information attribute in the mft record. At this + * stage we haven't setup the attribute list stuff yet, so this could + * in fact fail if the standard information is in an extent record, but + * I don't think this actually ever happens. + */ + if (!lookup_attr(AT_STANDARD_INFORMATION, NULL, 0, 0, 0, NULL, 0, + ctx)) { /* - * We cheat with this check on the basis that lcn will never - * be less than -1 and the lcn delta will fit in signed - * 32-bits (ntfs_cluster_t). (AIA) + * TODO: We should be performing a hot fix here (if the recover + * mount option is set) by creating a new attribute. */ - if (rl[i].lcn < (ntfs_cluster_t)-1) { - ntfs_error("layout_runs() encountered an out of bounds " - "cluster delta, lcn = %i.\n", - rl[i].lcn); - return -ERANGE; - } - rclus = rl[i].lcn - cluster; - len = rl[i].len; - rec[offset] = 0; - if (offset + 9 > size) - return -E2BIG; /* It might still fit, but this - * simplifies testing. */ + ntfs_error(vi->i_sb, "$STANDARD_INFORMATION attribute is " + "missing."); + goto unm_err_out; + } + /* Get the standard information attribute value. */ + si = (STANDARD_INFORMATION*)((char*)ctx->attr + + le16_to_cpu(ctx->attr->data.resident.value_offset)); + + /* Transfer information from the standard information into vfs_ino. */ + /* + * Note: The i_?times do not quite map perfectly onto the NTFS times, + * but they are close enough, and in the end it doesn't really matter + * that much... + */ + /* + * mtime is the last change of the data within the file. Not changed + * when only metadata is changed, e.g. a rename doesn't affect mtime. + */ + vi->i_mtime = ntfs2utc(si->last_data_change_time); + /* + * ctime is the last change of the metadata of the file. This obviously + * always changes, when mtime is changed. ctime can be changed on its + * own, mtime is then not changed, e.g. when a file is renamed. + */ + vi->i_ctime = ntfs2utc(si->last_mft_change_time); + /* + * Last access to the data within the file. Not changed during a rename + * for example but changed whenever the file is written to. + */ + vi->i_atime = ntfs2utc(si->last_access_time); + + /* Find the attribute list attribute if present. */ + reinit_attr_search_ctx(ctx); + if (lookup_attr(AT_ATTRIBUTE_LIST, NULL, 0, 0, 0, NULL, 0, ctx)) { + if (vi->i_ino == FILE_MFT) + goto skip_attr_list_load; + ntfs_debug("Attribute list found in inode 0x%lx.", vi->i_ino); + NInoSetAttrList(ni); + if (ctx->attr->flags & ATTR_IS_ENCRYPTED || + ctx->attr->flags & ATTR_COMPRESSION_MASK || + ctx->attr->flags & ATTR_IS_SPARSE) { + ntfs_error(vi->i_sb, "Attribute list attribute is " + "compressed/encrypted/sparse. Not " + "allowed. Corrupt inode. You should " + "run chkdsk."); + goto unm_err_out; + } + /* Now allocate memory for the attribute list. */ + ni->attr_list_size = (u32)attribute_value_length(ctx->attr); + ni->attr_list = ntfs_malloc_nofs(ni->attr_list_size); + if (!ni->attr_list) { + ntfs_error(vi->i_sb, "Not enough memory to allocate " + "buffer for attribute list."); + err = -ENOMEM; + goto unm_err_out; + } + if (ctx->attr->non_resident) { + NInoSetAttrListNonResident(ni); + if (ctx->attr->data.non_resident.lowest_vcn) { + ntfs_error(vi->i_sb, "Attribute list has non " + "zero lowest_vcn. Inode is " + "corrupt. You should run " + "chkdsk."); + goto unm_err_out; + } + /* + * Setup the run list. No need for locking as we have + * exclusive access to the inode at this time. + */ + ni->attr_list_rl.rl = decompress_mapping_pairs(vol, + ctx->attr, NULL); + if (IS_ERR(ni->attr_list_rl.rl)) { + err = PTR_ERR(ni->attr_list_rl.rl); + ni->attr_list_rl.rl = NULL; + ntfs_error(vi->i_sb, "Mapping pairs " + "decompression failed with " + "error code %i. Corrupt " + "attribute list in inode.", + -err); + goto unm_err_out; + } + /* Now load the attribute list. */ + if ((err = load_attribute_list(vol, &ni->attr_list_rl, + ni->attr_list, ni->attr_list_size, + sle64_to_cpu(ctx->attr->data. + non_resident.initialized_size)))) { + ntfs_error(vi->i_sb, "Failed to load " + "attribute list attribute."); + goto unm_err_out; + } + } else /* if (!ctx.attr->non_resident) */ { + if ((u8*)ctx->attr + le16_to_cpu( + ctx->attr->data.resident.value_offset) + + le32_to_cpu( + ctx->attr->data.resident.value_length) > + (u8*)ctx->mrec + vol->mft_record_size) { + ntfs_error(vi->i_sb, "Corrupt attribute list " + "in inode."); + goto unm_err_out; + } + /* Now copy the attribute list. */ + memcpy(ni->attr_list, (u8*)ctx->attr + le16_to_cpu( + ctx->attr->data.resident.value_offset), + le32_to_cpu( + ctx->attr->data.resident.value_length)); + } + } +skip_attr_list_load: + /* + * If an attribute list is present we now have the attribute list value + * in ntfs_ino->attr_list and it is ntfs_ino->attr_list_size bytes. + */ + if (S_ISDIR(vi->i_mode)) { + struct inode *bvi; + ntfs_inode *bni; + INDEX_ROOT *ir; + char *ir_end, *index_end; + + /* It is a directory, find index root attribute. */ + reinit_attr_search_ctx(ctx); + if (!lookup_attr(AT_INDEX_ROOT, I30, 4, CASE_SENSITIVE, 0, + NULL, 0, ctx)) { + // FIXME: File is corrupt! Hot-fix with empty index + // root attribute if recovery option is set. + ntfs_error(vi->i_sb, "$INDEX_ROOT attribute is " + "missing."); + goto unm_err_out; + } + /* Set up the state. */ + if (ctx->attr->non_resident) { + ntfs_error(vi->i_sb, "$INDEX_ROOT attribute is " + "not resident. Not allowed."); + goto unm_err_out; + } /* - * Run length is stored as signed number, so deal with it - * properly, i.e. observe that a negative number will have all - * its most significant bits set to 1 but we don't store that - * in the mapping pairs array. We store the smallest type of - * negative number required, thus in the first if we check - * whether len fits inside a signed byte and if so we store it - * as such, the next ifs check for a signed short, then a signed - * 24-bit and finally the full blown signed 32-bit. Same goes - * for rlus below. (AIA) + * Compressed/encrypted index root just means that the newly + * created files in that directory should be created compressed/ + * encrypted. However index root cannot be both compressed and + * encrypted. */ - if (len >= -0x80 && len <= 0x7f) { - NTFS_PUTU8(rec + offset + 1, len & 0xff); - coffs = 1; - } else if (len >= -0x8000 && len <= 0x7fff) { - NTFS_PUTU16(rec + offset + 1, len & 0xffff); - coffs = 2; - } else if (len >= -0x800000 && len <= 0x7fffff) { - NTFS_PUTU24(rec + offset + 1, len & 0xffffff); - coffs = 3; - } else /* if (len >= -0x80000000LL && len <= 0x7fffffff */ { - NTFS_PUTU32(rec + offset + 1, len); - coffs = 4; - } /* else ... FIXME: When len becomes 64-bit we need to extend - * the else if () statements. (AIA) */ - *(rec + offset) |= coffs++; - if (rl[i].lcn == (ntfs_cluster_t)-1) /* Compressed run. */ - /* Nothing */; - else if (rclus >= -0x80 && rclus <= 0x7f) { - *(rec + offset) |= 0x10; - NTFS_PUTS8(rec + offset + coffs, rclus & 0xff); - coffs += 1; - } else if (rclus >= -0x8000 && rclus <= 0x7fff) { - *(rec + offset) |= 0x20; - NTFS_PUTS16(rec + offset + coffs, rclus & 0xffff); - coffs += 2; - } else if (rclus >= -0x800000 && rclus <= 0x7fffff) { - *(rec + offset) |= 0x30; - NTFS_PUTS24(rec + offset + coffs, rclus & 0xffffff); - coffs += 3; - } else /* if (rclus >= -0x80000000LL && rclus <= 0x7fffffff)*/ { - *(rec + offset) |= 0x40; - NTFS_PUTS32(rec + offset + coffs, rclus - /* & 0xffffffffLL */); - coffs += 4; - } /* FIXME: When rclus becomes 64-bit. - else if (rclus >= -0x8000000000 && rclus <= 0x7FFFFFFFFF) { - *(rec + offset) |= 0x50; - NTFS_PUTS40(rec + offset + coffs, rclus & - 0xffffffffffLL); - coffs += 5; - } else if (rclus >= -0x800000000000 && - rclus <= 0x7FFFFFFFFFFF) { - *(rec + offset) |= 0x60; - NTFS_PUTS48(rec + offset + coffs, rclus & - 0xffffffffffffLL); - coffs += 6; - } else if (rclus >= -0x80000000000000 && - rclus <= 0x7FFFFFFFFFFFFF) { - *(rec + offset) |= 0x70; - NTFS_PUTS56(rec + offset + coffs, rclus & - 0xffffffffffffffLL); - coffs += 7; + if (ctx->attr->flags & ATTR_COMPRESSION_MASK) + NInoSetCompressed(ni); + if (ctx->attr->flags & ATTR_IS_ENCRYPTED) { + if (ctx->attr->flags & ATTR_COMPRESSION_MASK) { + ntfs_error(vi->i_sb, "Found encrypted and " + "compressed attribute. Not " + "allowed."); + goto unm_err_out; + } + NInoSetEncrypted(ni); + } + if (ctx->attr->flags & ATTR_IS_SPARSE) + NInoSetSparse(ni); + ir = (INDEX_ROOT*)((char*)ctx->attr + le16_to_cpu( + ctx->attr->data.resident.value_offset)); + ir_end = (char*)ir + le32_to_cpu( + ctx->attr->data.resident.value_length); + if (ir_end > (char*)ctx->mrec + vol->mft_record_size) { + ntfs_error(vi->i_sb, "$INDEX_ROOT attribute is " + "corrupt."); + goto unm_err_out; + } + index_end = (char*)&ir->index + + le32_to_cpu(ir->index.index_length); + if (index_end > ir_end) { + ntfs_error(vi->i_sb, "Directory index is corrupt."); + goto unm_err_out; + } + if (ir->type != AT_FILE_NAME) { + ntfs_error(vi->i_sb, "Indexed attribute is not " + "$FILE_NAME. Not allowed."); + goto unm_err_out; + } + if (ir->collation_rule != COLLATION_FILE_NAME) { + ntfs_error(vi->i_sb, "Index collation rule is not " + "COLLATION_FILE_NAME. Not allowed."); + goto unm_err_out; + } + ni->itype.index.block_size = le32_to_cpu(ir->index_block_size); + if (ni->itype.index.block_size & + (ni->itype.index.block_size - 1)) { + ntfs_error(vi->i_sb, "Index block size (%u) is not a " + "power of two.", + ni->itype.index.block_size); + goto unm_err_out; + } + if (ni->itype.index.block_size > PAGE_CACHE_SIZE) { + ntfs_error(vi->i_sb, "Index block size (%u) > " + "PAGE_CACHE_SIZE (%ld) is not " + "supported. Sorry.", + ni->itype.index.block_size, + PAGE_CACHE_SIZE); + err = -EOPNOTSUPP; + goto unm_err_out; + } + if (ni->itype.index.block_size < NTFS_BLOCK_SIZE) { + ntfs_error(vi->i_sb, "Index block size (%u) < " + "NTFS_BLOCK_SIZE (%i) is not " + "supported. Sorry.", + ni->itype.index.block_size, + NTFS_BLOCK_SIZE); + err = -EOPNOTSUPP; + goto unm_err_out; + } + ni->itype.index.block_size_bits = + ffs(ni->itype.index.block_size) - 1; + /* Determine the size of a vcn in the directory index. */ + if (vol->cluster_size <= ni->itype.index.block_size) { + ni->itype.index.vcn_size = vol->cluster_size; + ni->itype.index.vcn_size_bits = vol->cluster_size_bits; } else { - *(rec + offset) |= 0x80; - NTFS_PUTS64(rec + offset + coffs, rclus); - coffs += 8; - } */ - offset += coffs; - if (rl[i].lcn) - cluster = rl[i].lcn; - } - if (offset >= size) - return -E2BIG; - /* Terminating null. */ - *(rec + offset++) = 0; - *offs = offset; - return 0; -} + ni->itype.index.vcn_size = vol->sector_size; + ni->itype.index.vcn_size_bits = vol->sector_size_bits; + } -static void count_runs(ntfs_attribute *attr, char *buf) -{ - ntfs_u32 first, count, last, i; - - first = 0; - for (i = 0, count = 0; i < attr->d.r.len; i++) - count += attr->d.r.runlist[i].len; - last = first + count - 1; - NTFS_PUTU64(buf + 0x10, first); - NTFS_PUTU64(buf + 0x18, last); -} + /* Setup the index allocation attribute, even if not present. */ + NInoSetMstProtected(ni); + ni->type = AT_INDEX_ALLOCATION; + ni->name = I30; + ni->name_len = 4; + + if (!(ir->index.flags & LARGE_INDEX)) { + /* No index allocation. */ + vi->i_size = ni->initialized_size = + ni->allocated_size = 0; + /* We are done with the mft record, so we release it. */ + put_attr_search_ctx(ctx); + unmap_mft_record(ni); + m = NULL; + ctx = NULL; + goto skip_large_dir_stuff; + } /* LARGE_INDEX: Index allocation present. Setup state. */ + NInoSetIndexAllocPresent(ni); + /* Find index allocation attribute. */ + reinit_attr_search_ctx(ctx); + if (!lookup_attr(AT_INDEX_ALLOCATION, I30, 4, CASE_SENSITIVE, + 0, NULL, 0, ctx)) { + ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute " + "is not present but $INDEX_ROOT " + "indicated it is."); + goto unm_err_out; + } + if (!ctx->attr->non_resident) { + ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute " + "is resident."); + goto unm_err_out; + } + if (ctx->attr->flags & ATTR_IS_ENCRYPTED) { + ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute " + "is encrypted."); + goto unm_err_out; + } + if (ctx->attr->flags & ATTR_IS_SPARSE) { + ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute " + "is sparse."); + goto unm_err_out; + } + if (ctx->attr->flags & ATTR_COMPRESSION_MASK) { + ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute " + "is compressed."); + goto unm_err_out; + } + if (ctx->attr->data.non_resident.lowest_vcn) { + ntfs_error(vi->i_sb, "First extent of " + "$INDEX_ALLOCATION attribute has non " + "zero lowest_vcn. Inode is corrupt. " + "You should run chkdsk."); + goto unm_err_out; + } + vi->i_size = sle64_to_cpu( + ctx->attr->data.non_resident.data_size); + ni->initialized_size = sle64_to_cpu( + ctx->attr->data.non_resident.initialized_size); + ni->allocated_size = sle64_to_cpu( + ctx->attr->data.non_resident.allocated_size); + /* + * We are done with the mft record, so we release it. Otherwise + * we would deadlock in ntfs_attr_iget(). + */ + put_attr_search_ctx(ctx); + unmap_mft_record(ni); + m = NULL; + ctx = NULL; + /* Get the index bitmap attribute inode. */ + bvi = ntfs_attr_iget(vi, AT_BITMAP, I30, 4); + if (unlikely(IS_ERR(bvi))) { + ntfs_error(vi->i_sb, "Failed to get bitmap attribute."); + err = PTR_ERR(bvi); + goto unm_err_out; + } + ni->itype.index.bmp_ino = bvi; + bni = NTFS_I(bvi); + if (NInoCompressed(bni) || NInoEncrypted(bni) || + NInoSparse(bni)) { + ntfs_error(vi->i_sb, "$BITMAP attribute is compressed " + "and/or encrypted and/or sparse."); + goto unm_err_out; + } + /* Consistency check bitmap size vs. index allocation size. */ + if ((bvi->i_size << 3) < (vi->i_size >> + ni->itype.index.block_size_bits)) { + ntfs_error(vi->i_sb, "Index bitmap too small (0x%Lx) " + "for index allocation (0x%Lx).", + bvi->i_size << 3, vi->i_size); + goto unm_err_out; + } +skip_large_dir_stuff: + /* Everyone gets read and scan permissions. */ + vi->i_mode |= S_IRUGO | S_IXUGO; + /* If not read-only, set write permissions. */ + if (!IS_RDONLY(vi)) + vi->i_mode |= S_IWUGO; + /* + * Apply the directory permissions mask set in the mount + * options. + */ + vi->i_mode &= ~vol->dmask; + /* Setup the operations for this inode. */ + vi->i_op = &ntfs_dir_inode_ops; + vi->i_fop = &ntfs_dir_ops; + vi->i_mapping->a_ops = &ntfs_aops; + } else { + /* It is a file. */ + reinit_attr_search_ctx(ctx); + + /* Setup the data attribute, even if not present. */ + ni->type = AT_DATA; + ni->name = NULL; + ni->name_len = 0; + + /* Find first extent of the unnamed data attribute. */ + if (!lookup_attr(AT_DATA, NULL, 0, 0, 0, NULL, 0, ctx)) { + vi->i_size = ni->initialized_size = + ni->allocated_size = 0LL; + /* + * FILE_Secure does not have an unnamed $DATA + * attribute, so we special case it here. + */ + if (vi->i_ino == FILE_Secure) + goto no_data_attr_special_case; + /* + * Most if not all the system files in the $Extend + * system directory do not have unnamed data + * attributes so we need to check if the parent + * directory of the file is FILE_Extend and if it is + * ignore this error. To do this we need to get the + * name of this inode from the mft record as the name + * contains the back reference to the parent directory. + */ + if (ntfs_is_extended_system_file(ctx) > 0) + goto no_data_attr_special_case; + // FIXME: File is corrupt! Hot-fix with empty data + // attribute if recovery option is set. + ntfs_error(vi->i_sb, "$DATA attribute is " + "missing."); + goto unm_err_out; + } + /* Setup the state. */ + if (ctx->attr->non_resident) { + NInoSetNonResident(ni); + if (ctx->attr->flags & ATTR_COMPRESSION_MASK) { + NInoSetCompressed(ni); + if (vol->cluster_size > 4096) { + ntfs_error(vi->i_sb, "Found " + "compressed data but " + "compression is disabled due " + "to cluster size (%i) > 4kiB.", + vol->cluster_size); + goto unm_err_out; + } + if ((ctx->attr->flags & ATTR_COMPRESSION_MASK) + != ATTR_IS_COMPRESSED) { + ntfs_error(vi->i_sb, "Found " + "unknown compression method or " + "corrupt file."); + goto unm_err_out; + } + ni->itype.compressed.block_clusters = 1U << + ctx->attr->data.non_resident. + compression_unit; + if (ctx->attr->data.non_resident. + compression_unit != 4) { + ntfs_error(vi->i_sb, "Found " + "nonstandard compression unit " + "(%u instead of 4). Cannot " + "handle this. This might " + "indicate corruption so you " + "should run chkdsk.", + ctx->attr->data.non_resident. + compression_unit); + err = -EOPNOTSUPP; + goto unm_err_out; + } + ni->itype.compressed.block_size = 1U << ( + ctx->attr->data.non_resident. + compression_unit + + vol->cluster_size_bits); + ni->itype.compressed.block_size_bits = ffs( + ni->itype.compressed.block_size) - 1; + } + if (ctx->attr->flags & ATTR_IS_ENCRYPTED) { + if (ctx->attr->flags & ATTR_COMPRESSION_MASK) { + ntfs_error(vi->i_sb, "Found encrypted " + "and compressed data."); + goto unm_err_out; + } + NInoSetEncrypted(ni); + } + if (ctx->attr->flags & ATTR_IS_SPARSE) + NInoSetSparse(ni); + if (ctx->attr->data.non_resident.lowest_vcn) { + ntfs_error(vi->i_sb, "First extent of $DATA " + "attribute has non zero " + "lowest_vcn. Inode is corrupt. " + "You should run chkdsk."); + goto unm_err_out; + } + /* Setup all the sizes. */ + vi->i_size = sle64_to_cpu( + ctx->attr->data.non_resident.data_size); + ni->initialized_size = sle64_to_cpu( + ctx->attr->data.non_resident. + initialized_size); + ni->allocated_size = sle64_to_cpu( + ctx->attr->data.non_resident. + allocated_size); + if (NInoCompressed(ni)) { + ni->itype.compressed.size = sle64_to_cpu( + ctx->attr->data.non_resident. + compressed_size); + } + } else { /* Resident attribute. */ + /* + * Make all sizes equal for simplicity in read code + * paths. FIXME: Need to keep this in mind when + * converting to non-resident attribute in write code + * path. (Probably only affects truncate().) + */ + vi->i_size = ni->initialized_size = ni->allocated_size = + le32_to_cpu( + ctx->attr->data.resident.value_length); + } +no_data_attr_special_case: + /* We are done with the mft record, so we release it. */ + put_attr_search_ctx(ctx); + unmap_mft_record(ni); + m = NULL; + ctx = NULL; + /* Everyone gets all permissions. */ + vi->i_mode |= S_IRWXUGO; + /* If read-only, noone gets write permissions. */ + if (IS_RDONLY(vi)) + vi->i_mode &= ~S_IWUGO; + /* Apply the file permissions mask set in the mount options. */ + vi->i_mode &= ~vol->fmask; + /* Setup the operations for this inode. */ + vi->i_op = &ntfs_file_inode_ops; + vi->i_fop = &ntfs_file_ops; + vi->i_mapping->a_ops = &ntfs_aops; + } + /* + * The number of 512-byte blocks used on disk (for stat). This is in so + * far inaccurate as it doesn't account for any named streams or other + * special non-resident attributes, but that is how Windows works, too, + * so we are at least consistent with Windows, if not entirely + * consistent with the Linux Way. Doing it the Linux Way would cause a + * significant slowdown as it would involve iterating over all + * attributes in the mft record and adding the allocated/compressed + * sizes of all non-resident attributes present to give us the Linux + * correct size that should go into i_blocks (after division by 512). + */ + if (S_ISDIR(vi->i_mode) || !NInoCompressed(ni)) + vi->i_blocks = ni->allocated_size >> 9; + else + vi->i_blocks = ni->itype.compressed.size >> 9; + + ntfs_debug("Done."); + return; +unm_err_out: + if (!err) + err = -EIO; + if (ctx) + put_attr_search_ctx(ctx); + if (m) + unmap_mft_record(ni); +err_out: + ntfs_error(vi->i_sb, "Failed with error code %i. Marking inode 0x%lx " + "as bad.", -err, vi->i_ino); + make_bad_inode(vi); + return; +} /** - * layout_attr - convert in memory attribute to on disk attribute record - * @attr: in memory attribute to convert - * @buf: destination buffer for on disk attribute record - * @size: size of the destination buffer - * @psize: size of converted on disk attribute record (out variable) - * - * layout_attr() takes the attribute @attr and converts it into the appropriate - * on disk structure, writing it into @buf (up to @size bytes are written). - * - * On success we return 0 and set @*psize to the actual byte size of the on- - * disk attribute that was written into @buf. + * ntfs_read_locked_attr_inode - read an attribute inode from its base inode + * @base_vi: base inode + * @vi: attribute inode to read + * + * ntfs_read_locked_attr_inode() is called from the ntfs_attr_iget() to read + * the attribute inode described by @vi into memory from the base mft record + * described by @base_ni. + * + * ntfs_read_locked_attr_inode() maps, pins and locks the base inode for + * reading and looks up the attribute described by @vi before setting up the + * necessary fields in @vi as well as initializing the ntfs inode. + * + * Q: What locks are held when the function is called? + * A: i_state has I_LOCK set, hence the inode is locked, also + * i_count is set to 1, so it is not going to go away */ -static int layout_attr(ntfs_attribute *attr, char *buf, int size, int *psize) +static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi) { - int nameoff, hdrsize, asize; - - if (attr->resident) { - nameoff = 0x18; - hdrsize = (nameoff + 2 * attr->namelen + 7) & ~7; - asize = (hdrsize + attr->size + 7) & ~7; - if (size < asize) - return -E2BIG; - NTFS_PUTU32(buf + 0x10, attr->size); - NTFS_PUTU8(buf + 0x16, attr->indexed); - NTFS_PUTU16(buf + 0x14, hdrsize); - if (attr->size) - ntfs_memcpy(buf + hdrsize, attr->d.data, attr->size); + ntfs_volume *vol = NTFS_SB(vi->i_sb); + ntfs_inode *ni, *base_ni; + MFT_RECORD *m; + attr_search_context *ctx; + int err = 0; + + ntfs_debug("Entering for i_ino 0x%lx.", vi->i_ino); + + ntfs_init_big_inode(vi); + + ni = NTFS_I(vi); + base_ni = NTFS_I(base_vi); + + /* Just mirror the values from the base inode. */ + vi->i_blksize = base_vi->i_blksize; + vi->i_version = base_vi->i_version; + vi->i_uid = base_vi->i_uid; + vi->i_gid = base_vi->i_gid; + vi->i_nlink = base_vi->i_nlink; + vi->i_mtime = base_vi->i_mtime; + vi->i_ctime = base_vi->i_ctime; + vi->i_atime = base_vi->i_atime; + ni->seq_no = base_ni->seq_no; + + /* Set inode type to zero but preserve permissions. */ + vi->i_mode = base_vi->i_mode & ~S_IFMT; + + m = map_mft_record(base_ni); + if (IS_ERR(m)) { + err = PTR_ERR(m); + goto err_out; + } + ctx = get_attr_search_ctx(base_ni, m); + if (!ctx) { + err = -ENOMEM; + goto unm_err_out; + } + + /* Find the attribute. */ + if (!lookup_attr(ni->type, ni->name, ni->name_len, IGNORE_CASE, 0, + NULL, 0, ctx)) + goto unm_err_out; + + if (!ctx->attr->non_resident) { + if (NInoMstProtected(ni) || ctx->attr->flags) { + ntfs_error(vi->i_sb, "Found mst protected attribute " + "or attribute with non-zero flags but " + "the attribute is resident (mft_no " + "0x%lx, type 0x%x, name_len %i). " + "Please report you saw this message " + "to linux-ntfs-dev@lists.sf.net", + vi->i_ino, ni->type, ni->name_len); + goto unm_err_out; + } + /* + * Resident attribute. Make all sizes equal for simplicity in + * read code paths. + */ + vi->i_size = ni->initialized_size = ni->allocated_size = + le32_to_cpu(ctx->attr->data.resident.value_length); } else { - int error; + NInoSetNonResident(ni); + if (ctx->attr->flags & ATTR_COMPRESSION_MASK) { + if (NInoMstProtected(ni)) { + ntfs_error(vi->i_sb, "Found mst protected " + "attribute but the attribute " + "is compressed (mft_no 0x%lx, " + "type 0x%x, name_len %i). " + "Please report you saw this " + "message to linux-ntfs-dev@" + "lists.sf.net", vi->i_ino, + ni->type, ni->name_len); + goto unm_err_out; + } + NInoSetCompressed(ni); + if ((ni->type != AT_DATA) || (ni->type == AT_DATA && + ni->name_len)) { + ntfs_error(vi->i_sb, "Found compressed non-" + "data or named data attribute " + "(mft_no 0x%lx, type 0x%x, " + "name_len %i). Please report " + "you saw this message to " + "linux-ntfs-dev@lists.sf.net", + vi->i_ino, ni->type, + ni->name_len); + goto unm_err_out; + } + if (vol->cluster_size > 4096) { + ntfs_error(vi->i_sb, "Found " + "compressed attribute but " + "compression is disabled due " + "to cluster size (%i) > 4kiB.", + vol->cluster_size); + goto unm_err_out; + } + if ((ctx->attr->flags & ATTR_COMPRESSION_MASK) + != ATTR_IS_COMPRESSED) { + ntfs_error(vi->i_sb, "Found unknown " + "compression method or " + "corrupt file."); + goto unm_err_out; + } + ni->itype.compressed.block_clusters = 1U << + ctx->attr->data.non_resident. + compression_unit; + if (ctx->attr->data.non_resident.compression_unit != 4) { + ntfs_error(vi->i_sb, "Found " + "nonstandard compression unit " + "(%u instead of 4). Cannot " + "handle this. This might " + "indicate corruption so you " + "should run chkdsk.", + ctx->attr->data.non_resident. + compression_unit); + err = -EOPNOTSUPP; + goto unm_err_out; + } + ni->itype.compressed.block_size = 1U << ( + ctx->attr->data.non_resident. + compression_unit + + vol->cluster_size_bits); + ni->itype.compressed.block_size_bits = ffs( + ni->itype.compressed.block_size) - 1; + } + if (ctx->attr->flags & ATTR_IS_ENCRYPTED) { + if (ctx->attr->flags & ATTR_COMPRESSION_MASK) { + ntfs_error(vi->i_sb, "Found encrypted " + "and compressed data."); + goto unm_err_out; + } + if (NInoMstProtected(ni)) { + ntfs_error(vi->i_sb, "Found mst protected " + "attribute but the attribute " + "is encrypted (mft_no 0x%lx, " + "type 0x%x, name_len %i). " + "Please report you saw this " + "message to linux-ntfs-dev@" + "lists.sf.net", vi->i_ino, + ni->type, ni->name_len); + goto unm_err_out; + } + NInoSetEncrypted(ni); + } + if (ctx->attr->flags & ATTR_IS_SPARSE) { + if (NInoMstProtected(ni)) { + ntfs_error(vi->i_sb, "Found mst protected " + "attribute but the attribute " + "is sparse (mft_no 0x%lx, " + "type 0x%x, name_len %i). " + "Please report you saw this " + "message to linux-ntfs-dev@" + "lists.sf.net", vi->i_ino, + ni->type, ni->name_len); + goto unm_err_out; + } + NInoSetSparse(ni); + } + if (ctx->attr->data.non_resident.lowest_vcn) { + ntfs_error(vi->i_sb, "First extent of attribute has " + "non-zero lowest_vcn. Inode is " + "corrupt. You should run chkdsk."); + goto unm_err_out; + } + /* Setup all the sizes. */ + vi->i_size = sle64_to_cpu( + ctx->attr->data.non_resident.data_size); + ni->initialized_size = sle64_to_cpu( + ctx->attr->data.non_resident.initialized_size); + ni->allocated_size = sle64_to_cpu( + ctx->attr->data.non_resident.allocated_size); + if (NInoCompressed(ni)) { + ni->itype.compressed.size = sle64_to_cpu( + ctx->attr->data.non_resident.compressed_size); + } + } + + /* Setup the operations for this attribute inode. */ + vi->i_op = NULL; + vi->i_fop = NULL; + vi->i_mapping->a_ops = &ntfs_aops; - if (attr->flags & ATTR_IS_COMPRESSED) - nameoff = 0x48; - else - nameoff = 0x40; - hdrsize = (nameoff + 2 * attr->namelen + 7) & ~7; - if (size < hdrsize) - return -E2BIG; - /* Make asize point at the end of the attribute record header, - i.e. at the beginning of the mapping pairs array. */ - asize = hdrsize; - error = layout_runs(attr, buf, &asize, size); - /* Now, asize points one byte beyond the end of the mapping - pairs array. */ - if (error) - return error; - /* The next attribute has to begin on 8-byte boundary. */ - asize = (asize + 7) & ~7; - /* FIXME: fragments */ - count_runs(attr, buf); - NTFS_PUTU16(buf + 0x20, hdrsize); - NTFS_PUTU16(buf + 0x22, attr->cengine); - NTFS_PUTU32(buf + 0x24, 0); - NTFS_PUTS64(buf + 0x28, attr->allocated); - NTFS_PUTS64(buf + 0x30, attr->size); - NTFS_PUTS64(buf + 0x38, attr->initialized); - if (attr->flags & ATTR_IS_COMPRESSED) - NTFS_PUTS64(buf + 0x40, attr->compsize); - } - NTFS_PUTU32(buf, attr->type); - NTFS_PUTU32(buf + 4, asize); - NTFS_PUTU8(buf + 8, attr->resident ? 0 : 1); - NTFS_PUTU8(buf + 9, attr->namelen); - NTFS_PUTU16(buf + 0xa, nameoff); - NTFS_PUTU16(buf + 0xc, attr->flags); - NTFS_PUTU16(buf + 0xe, attr->attrno); - if (attr->namelen) - ntfs_memcpy(buf + nameoff, attr->name, 2 * attr->namelen); - *psize = asize; + if (!NInoCompressed(ni)) + vi->i_blocks = ni->allocated_size >> 9; + else + vi->i_blocks = ni->itype.compressed.size >> 9; + + /* + * Make sure the base inode doesn't go away and attach it to the + * attribute inode. + */ + igrab(base_vi); + ni->ext.base_ntfs_ino = base_ni; + ni->nr_extents = -1; + + put_attr_search_ctx(ctx); + unmap_mft_record(base_ni); + + ntfs_debug("Done."); return 0; + +unm_err_out: + if (!err) + err = -EIO; + if (ctx) + put_attr_search_ctx(ctx); + unmap_mft_record(base_ni); +err_out: + ntfs_error(vi->i_sb, "Failed with error code %i while reading " + "attribute inode (mft_no 0x%lx, type 0x%x, name_len " + "%i.", -err, vi->i_ino, ni->type, ni->name_len); + make_bad_inode(vi); + return err; } /** - * layout_inode - convert an in-memory inode into on disk mft record(s) - * @ino: in memory inode to convert - * @store: on disk inode, contain buffers for the on disk mft record(s) - * - * layout_inode takes the in memory inode @ino, converts it into a (sequence of) - * mft record(s) and writes them to the appropriate buffers in the @store. - * - * Return 0 on success, - * the required mft record count (>0) if the inode does not fit, - * -ENOMEM if memory allocation problem, or - * -EOPNOTSUP if beyond our capabilities. + * ntfs_read_inode_mount - special read_inode for mount time use only + * @vi: inode to read * - * TODO: We at the moment do not support extension mft records. (AIA) + * Read inode FILE_MFT at mount time, only called with super_block lock + * held from within the read_super() code path. + * + * This function exists because when it is called the page cache for $MFT/$DATA + * is not initialized and hence we cannot get at the contents of mft records + * by calling map_mft_record*(). + * + * Further it needs to cope with the circular references problem, i.e. can't + * load any attributes other than $ATTRIBUTE_LIST until $DATA is loaded, because + * we don't know where the other extent mft records are yet and again, because + * we cannot call map_mft_record*() yet. Obviously this applies only when an + * attribute list is actually present in $MFT inode. + * + * We solve these problems by starting with the $DATA attribute before anything + * else and iterating using lookup_attr($DATA) over all extents. As each extent + * is found, we decompress_mapping_pairs() including the implied + * merge_run_lists(). Each step of the iteration necessarily provides + * sufficient information for the next step to complete. + * + * This should work but there are two possible pit falls (see inline comments + * below), but only time will tell if they are real pits or just smoke... */ -int layout_inode(ntfs_inode *ino, ntfs_disk_inode *store) +void ntfs_read_inode_mount(struct inode *vi) { - int offset, i, size, psize, error, count, recno; - ntfs_attribute *attr; - unsigned char *rec; - - error = allocate_store(ino->vol, store, ino->record_count); - if (error) - return error; - size = ino->vol->mft_record_size; - count = i = 0; - do { - if (count < ino->record_count) { - recno = ino->records[count]; - } else { - error = allocate_store(ino->vol, store, count + 1); - if (error) - return error; - recno = -1; + VCN next_vcn, last_vcn, highest_vcn; + s64 block; + struct super_block *sb = vi->i_sb; + ntfs_volume *vol = NTFS_SB(sb); + struct buffer_head *bh; + ntfs_inode *ni; + MFT_RECORD *m = NULL; + ATTR_RECORD *attr; + attr_search_context *ctx; + unsigned int i, nr_blocks; + int err; + ntfs_attr na; + + ntfs_debug("Entering."); + + if (vi->i_ino != FILE_MFT) { + ntfs_error(sb, "Called for inode 0x%lx but only inode %d " + "allowed.", vi->i_ino, FILE_MFT); + goto err_out; + } + + /* Initialize the ntfs specific part of @vi. */ + ntfs_init_big_inode(vi); + + ni = NTFS_I(vi); + ni->state = 0; + /* Setup the data attribute. It is special as it is mst protected. */ + NInoSetNonResident(ni); + NInoSetMstProtected(ni); + na.mft_no = ni->mft_no; + na.type = ni->type = AT_DATA; + na.name = ni->name = NULL; + na.name_len = ni->name_len = 0; + + /* + * This sets up our little cheat allowing us to reuse the async io + * completion handler for directories. + */ + ni->itype.index.block_size = vol->mft_record_size; + ni->itype.index.block_size_bits = vol->mft_record_size_bits; + + /* Very important! Needed to be able to call map_mft_record*(). */ + vol->mft_ino = vi; + + /* Allocate enough memory to read the first mft record. */ + if (vol->mft_record_size > 64 * 1024) { + ntfs_error(sb, "Unsupported mft record size %i (max 64kiB).", + vol->mft_record_size); + goto err_out; + } + i = vol->mft_record_size; + if (i < sb->s_blocksize) + i = sb->s_blocksize; + m = (MFT_RECORD*)ntfs_malloc_nofs(i); + if (!m) { + ntfs_error(sb, "Failed to allocate buffer for $MFT record 0."); + goto err_out; + } + + /* Determine the first block of the $MFT/$DATA attribute. */ + block = vol->mft_lcn << vol->cluster_size_bits >> + sb->s_blocksize_bits; + nr_blocks = vol->mft_record_size >> sb->s_blocksize_bits; + if (!nr_blocks) + nr_blocks = 1; + + /* Load $MFT/$DATA's first mft record. */ + for (i = 0; i < nr_blocks; i++) { + bh = sb_bread(sb, block++); + if (!bh) { + ntfs_error(sb, "Device read failed."); + goto err_out; + } + memcpy((char*)m + (i << sb->s_blocksize_bits), bh->b_data, + sb->s_blocksize); + brelse(bh); + } + + /* Apply the mst fixups. */ + if (post_read_mst_fixup((NTFS_RECORD*)m, vol->mft_record_size)) { + /* FIXME: Try to use the $MFTMirr now. */ + ntfs_error(sb, "MST fixup failed. $MFT is corrupt."); + goto err_out; + } + + /* Need this to sanity check attribute list references to $MFT. */ + ni->seq_no = le16_to_cpu(m->sequence_number); + + /* Provides readpage() and sync_page() for map_mft_record(). */ + vi->i_mapping->a_ops = &ntfs_mft_aops; + + ctx = get_attr_search_ctx(ni, m); + if (!ctx) { + err = -ENOMEM; + goto err_out; + } + + /* Find the attribute list attribute if present. */ + if (lookup_attr(AT_ATTRIBUTE_LIST, NULL, 0, 0, 0, NULL, 0, ctx)) { + ATTR_LIST_ENTRY *al_entry, *next_al_entry; + u8 *al_end; + + ntfs_debug("Attribute list attribute found in $MFT."); + NInoSetAttrList(ni); + if (ctx->attr->flags & ATTR_IS_ENCRYPTED || + ctx->attr->flags & ATTR_COMPRESSION_MASK || + ctx->attr->flags & ATTR_IS_SPARSE) { + ntfs_error(sb, "Attribute list attribute is " + "compressed/encrypted/sparse. Not " + "allowed. $MFT is corrupt. You should " + "run chkdsk."); + goto put_err_out; + } + /* Now allocate memory for the attribute list. */ + ni->attr_list_size = (u32)attribute_value_length(ctx->attr); + ni->attr_list = ntfs_malloc_nofs(ni->attr_list_size); + if (!ni->attr_list) { + ntfs_error(sb, "Not enough memory to allocate buffer " + "for attribute list."); + goto put_err_out; + } + if (ctx->attr->non_resident) { + NInoSetAttrListNonResident(ni); + if (ctx->attr->data.non_resident.lowest_vcn) { + ntfs_error(sb, "Attribute list has non zero " + "lowest_vcn. $MFT is corrupt. " + "You should run chkdsk."); + goto put_err_out; + } + /* Setup the run list. */ + ni->attr_list_rl.rl = decompress_mapping_pairs(vol, + ctx->attr, NULL); + if (IS_ERR(ni->attr_list_rl.rl)) { + err = PTR_ERR(ni->attr_list_rl.rl); + ni->attr_list_rl.rl = NULL; + ntfs_error(sb, "Mapping pairs decompression " + "failed with error code %i.", + -err); + goto put_err_out; + } + /* Now load the attribute list. */ + if ((err = load_attribute_list(vol, &ni->attr_list_rl, + ni->attr_list, ni->attr_list_size, + sle64_to_cpu(ctx->attr->data. + non_resident.initialized_size)))) { + ntfs_error(sb, "Failed to load attribute list " + "attribute with error code %i.", + -err); + goto put_err_out; + } + } else /* if (!ctx.attr->non_resident) */ { + if ((u8*)ctx->attr + le16_to_cpu( + ctx->attr->data.resident.value_offset) + + le32_to_cpu( + ctx->attr->data.resident.value_length) > + (u8*)ctx->mrec + vol->mft_record_size) { + ntfs_error(sb, "Corrupt attribute list " + "attribute."); + goto put_err_out; + } + /* Now copy the attribute list. */ + memcpy(ni->attr_list, (u8*)ctx->attr + le16_to_cpu( + ctx->attr->data.resident.value_offset), + le32_to_cpu( + ctx->attr->data.resident.value_length)); } + /* The attribute list is now setup in memory. */ /* - * FIXME: We need to support extension records properly. - * At the moment they wouldn't work. Probably would "just" get - * corrupted if we write to them... (AIA) + * FIXME: I don't know if this case is actually possible. + * According to logic it is not possible but I have seen too + * many weird things in MS software to rely on logic... Thus we + * perform a manual search and make sure the first $MFT/$DATA + * extent is in the base inode. If it is not we abort with an + * error and if we ever see a report of this error we will need + * to do some magic in order to have the necessary mft record + * loaded and in the right place in the page cache. But + * hopefully logic will prevail and this never happens... */ - store->records[count].recno = recno; - rec = store->records[count].record; - count++; - /* Copy mft record header. */ - offset = NTFS_GETU16(ino->attr + 0x14); /* attrs_offset */ - ntfs_memcpy(rec, ino->attr, offset); - /* Copy attributes. */ - while (i < ino->attr_count) { - attr = ino->attrs + i; - error = layout_attr(attr, rec + offset, - size - offset - 8, &psize); - if (error == -E2BIG && offset != NTFS_GETU16(ino->attr - + 0x14)) - break; - if (error) - return error; - offset += psize; - i++; - } - /* Terminating attribute. */ - NTFS_PUTU32(rec + offset, 0xFFFFFFFF); - offset += 4; - NTFS_PUTU32(rec + offset, 0); - offset += 4; - NTFS_PUTU32(rec + 0x18, offset); - } while (i < ino->attr_count || count < ino->record_count); - return count - ino->record_count; -} - -/* - * FIXME: ntfs_update_inode() calls layout_inode() to create the mft record on - * disk structure corresponding to the inode @ino. After that, ntfs_write_attr() - * is called to write out the created mft record to disk. - * We shouldn't need to re-layout every single time we are updating an mft - * record. No wonder the ntfs driver is slow like hell. (AIA) - */ -int ntfs_update_inode(ntfs_inode *ino) -{ - int error, i; - ntfs_disk_inode store; - ntfs_io io; - - ntfs_bzero(&store, sizeof(store)); - error = layout_inode(ino, &store); - if (error == -E2BIG) { - i = ntfs_split_indexroot(ino); - if (i != -ENOTDIR) { - if (!i) - i = layout_inode(ino, &store); - error = i; - } - } - if (error == -E2BIG) { - error = ntfs_attr_allnonresident(ino); - if (!error) - error = layout_inode(ino, &store); - } - if (error > 0) { - /* FIXME: Introduce extension records. */ - error = -E2BIG; - } - if (error) { - if (error == -E2BIG) - ntfs_error("Cannot handle saving inode 0x%x.\n", - ino->i_number); - deallocate_store(&store); - return error; - } - io.fn_get = ntfs_get; - io.fn_put = 0; - for (i = 0; i < store.count; i++) { - error = ntfs_insert_fixups(store.records[i].record, - ino->vol->mft_record_size); - if (error) { - printk(KERN_ALERT "NTFS: ntfs_update_inode() caught " - "corrupt %s mtf record ntfs record " - "header. Refusing to write corrupt " - "data to disk. Unmount and run chkdsk " - "immediately!\n", i ? "extension": - "base"); - deallocate_store(&store); - return -EIO; - } - io.param = store.records[i].record; - io.size = ino->vol->mft_record_size; - error = ntfs_write_attr(ino->vol->mft_ino, ino->vol->at_data, - 0, (__s64)store.records[i].recno << - ino->vol->mft_record_size_bits, &io); - if (error || io.size != ino->vol->mft_record_size) { - /* Big trouble, partially written file. */ - ntfs_error("Please unmount: Write error in inode " - "0x%x\n", ino->i_number); - deallocate_store(&store); - return error ? error : -EIO; + al_entry = (ATTR_LIST_ENTRY*)ni->attr_list; + al_end = (u8*)al_entry + ni->attr_list_size; + for (;; al_entry = next_al_entry) { + /* Out of bounds check. */ + if ((u8*)al_entry < ni->attr_list || + (u8*)al_entry > al_end) + goto em_put_err_out; + /* Catch the end of the attribute list. */ + if ((u8*)al_entry == al_end) + goto em_put_err_out; + if (!al_entry->length) + goto em_put_err_out; + if ((u8*)al_entry + 6 > al_end || (u8*)al_entry + + le16_to_cpu(al_entry->length) > al_end) + goto em_put_err_out; + next_al_entry = (ATTR_LIST_ENTRY*)((u8*)al_entry + + le16_to_cpu(al_entry->length)); + if (le32_to_cpu(al_entry->type) > + const_le32_to_cpu(AT_DATA)) + goto em_put_err_out; + if (AT_DATA != al_entry->type) + continue; + /* We want an unnamed attribute. */ + if (al_entry->name_length) + goto em_put_err_out; + /* Want the first entry, i.e. lowest_vcn == 0. */ + if (al_entry->lowest_vcn) + goto em_put_err_out; + /* First entry has to be in the base mft record. */ + if (MREF_LE(al_entry->mft_reference) != vi->i_ino) { + /* MFT references do not match, logic fails. */ + ntfs_error(sb, "BUG: The first $DATA extent " + "of $MFT is not in the base " + "mft record. Please report " + "you saw this message to " + "linux-ntfs-dev@lists.sf.net"); + goto put_err_out; + } else { + /* Sequence numbers must match. */ + if (MSEQNO_LE(al_entry->mft_reference) != + ni->seq_no) + goto em_put_err_out; + /* Got it. All is ok. We can stop now. */ + break; + } } } - deallocate_store(&store); - return 0; -} -void ntfs_decompress(unsigned char *dest, unsigned char *src, ntfs_size_t l) -{ - int head, comp; - int copied = 0; - unsigned char *stop; - int bits; - int tag = 0; - int clear_pos; - - while (1) { - head = NTFS_GETU16(src) & 0xFFF; - /* High bit indicates that compression was performed. */ - comp = NTFS_GETU16(src) & 0x8000; - src += 2; - stop = src + head; - bits = 0; - clear_pos = 0; - if (head == 0) - /* Block is not used. */ - return;/* FIXME: copied */ - if (!comp) { /* uncompressible */ - ntfs_memcpy(dest, src, 0x1000); - dest += 0x1000; - copied += 0x1000; - src += 0x1000; - if (l == copied) - return; - continue; - } - while (src <= stop) { - if (clear_pos > 4096) { - ntfs_error("Error 1 in decompress\n"); - return; - } - if (!bits) { - tag = NTFS_GETU8(src); - bits = 8; - src++; - if (src > stop) - break; - } - if (tag & 1) { - int i, len, delta, code, lmask, dshift; - code = NTFS_GETU16(src); - src += 2; - if (!clear_pos) { - ntfs_error("Error 2 in decompress\n"); - return; - } - for (i = clear_pos - 1, lmask = 0xFFF, - dshift = 12; i >= 0x10; i >>= 1) { - lmask >>= 1; - dshift--; - } - delta = code >> dshift; - len = (code & lmask) + 3; - for (i = 0; i < len; i++) { - dest[clear_pos] = dest[clear_pos - - delta - 1]; - clear_pos++; - copied++; - if (copied==l) - return; - } - } else { - dest[clear_pos++] = NTFS_GETU8(src); - src++; - copied++; - if (copied==l) - return; + reinit_attr_search_ctx(ctx); + + /* Now load all attribute extents. */ + attr = NULL; + next_vcn = last_vcn = highest_vcn = 0; + while (lookup_attr(AT_DATA, NULL, 0, 0, next_vcn, NULL, 0, ctx)) { + run_list_element *nrl; + + /* Cache the current attribute. */ + attr = ctx->attr; + /* $MFT must be non-resident. */ + if (!attr->non_resident) { + ntfs_error(sb, "$MFT must be non-resident but a " + "resident extent was found. $MFT is " + "corrupt. Run chkdsk."); + goto put_err_out; + } + /* $MFT must be uncompressed and unencrypted. */ + if (attr->flags & ATTR_COMPRESSION_MASK || + attr->flags & ATTR_IS_ENCRYPTED || + attr->flags & ATTR_IS_SPARSE) { + ntfs_error(sb, "$MFT must be uncompressed, " + "non-sparse, and unencrypted but a " + "compressed/sparse/encrypted extent " + "was found. $MFT is corrupt. Run " + "chkdsk."); + goto put_err_out; + } + /* + * Decompress the mapping pairs array of this extent and merge + * the result into the existing run list. No need for locking + * as we have exclusive access to the inode at this time and we + * are a mount in progress task, too. + */ + nrl = decompress_mapping_pairs(vol, attr, ni->run_list.rl); + if (IS_ERR(nrl)) { + ntfs_error(sb, "decompress_mapping_pairs() failed with " + "error code %ld. $MFT is corrupt.", + PTR_ERR(nrl)); + goto put_err_out; + } + ni->run_list.rl = nrl; + + /* Are we in the first extent? */ + if (!next_vcn) { + u64 ll; + + if (attr->data.non_resident.lowest_vcn) { + ntfs_error(sb, "First extent of $DATA " + "attribute has non zero " + "lowest_vcn. $MFT is corrupt. " + "You should run chkdsk."); + goto put_err_out; + } + /* Get the last vcn in the $DATA attribute. */ + last_vcn = sle64_to_cpu( + attr->data.non_resident.allocated_size) + >> vol->cluster_size_bits; + /* Fill in the inode size. */ + vi->i_size = sle64_to_cpu( + attr->data.non_resident.data_size); + ni->initialized_size = sle64_to_cpu(attr->data. + non_resident.initialized_size); + ni->allocated_size = sle64_to_cpu( + attr->data.non_resident.allocated_size); + /* Set the number of mft records. */ + ll = vi->i_size >> vol->mft_record_size_bits; + /* + * Verify the number of mft records does not exceed + * 2^32 - 1. + */ + if (ll >= (1ULL << 32)) { + ntfs_error(sb, "$MFT is too big! Aborting."); + goto put_err_out; + } + vol->nr_mft_records = ll; + /* + * We have got the first extent of the run_list for + * $MFT which means it is now relatively safe to call + * the normal ntfs_read_inode() function. Thus, take + * us out of the calling chain. Also we need to do this + * now because we need ntfs_read_inode() in place to + * get at subsequent extents. + */ + sb->s_op = &ntfs_sops; + /* + * Complete reading the inode, this will actually + * re-read the mft record for $MFT, this time entering + * it into the page cache with which we complete the + * kick start of the volume. It should be safe to do + * this now as the first extent of $MFT/$DATA is + * already known and we would hope that we don't need + * further extents in order to find the other + * attributes belonging to $MFT. Only time will tell if + * this is really the case. If not we will have to play + * magic at this point, possibly duplicating a lot of + * ntfs_read_inode() at this point. We will need to + * ensure we do enough of its work to be able to call + * ntfs_read_inode() on extents of $MFT/$DATA. But lets + * hope this never happens... + */ + ntfs_read_locked_inode(vi, &na); + if (is_bad_inode(vi)) { + ntfs_error(sb, "ntfs_read_inode() of $MFT " + "failed. BUG or corrupt $MFT. " + "Run chkdsk and if no errors " + "are found, please report you " + "saw this message to " + "linux-ntfs-dev@lists.sf.net"); + put_attr_search_ctx(ctx); + /* Revert to the safe super operations. */ + sb->s_op = &ntfs_mount_sops; + goto out_now; } - tag >>= 1; - bits--; + /* + * Re-initialize some specifics about $MFT's inode as + * ntfs_read_inode() will have set up the default ones. + */ + /* Set uid and gid to root. */ + vi->i_uid = vi->i_gid = 0; + /* Regular file. No access for anyone. */ + vi->i_mode = S_IFREG; + /* No VFS initiated operations allowed for $MFT. */ + vi->i_op = &ntfs_empty_inode_ops; + vi->i_fop = &ntfs_empty_file_ops; + /* Put back our special address space operations. */ + vi->i_mapping->a_ops = &ntfs_mft_aops; + } + + /* Get the lowest vcn for the next extent. */ + highest_vcn = sle64_to_cpu(attr->data.non_resident.highest_vcn); + next_vcn = highest_vcn + 1; + + /* Only one extent or error, which we catch below. */ + if (next_vcn <= 0) + break; + + /* Avoid endless loops due to corruption. */ + if (next_vcn < sle64_to_cpu( + attr->data.non_resident.lowest_vcn)) { + ntfs_error(sb, "$MFT has corrupt attribute list " + "attribute. Run chkdsk."); + goto put_err_out; } - dest += clear_pos; } + if (!attr) { + ntfs_error(sb, "$MFT/$DATA attribute not found. $MFT is " + "corrupt. Run chkdsk."); + goto put_err_out; + } + if (highest_vcn && highest_vcn != last_vcn - 1) { + ntfs_error(sb, "Failed to load the complete run list " + "for $MFT/$DATA. Driver bug or " + "corrupt $MFT. Run chkdsk."); + ntfs_debug("highest_vcn = 0x%Lx, last_vcn - 1 = 0x%Lx", + (long long)highest_vcn, + (long long)last_vcn - 1); + goto put_err_out; + } + put_attr_search_ctx(ctx); + ntfs_debug("Done."); +out_now: + ntfs_free(m); + return; +em_put_err_out: + ntfs_error(sb, "Couldn't find first extent of $DATA attribute in " + "attribute list. $MFT is corrupt. Run chkdsk."); +put_err_out: + put_attr_search_ctx(ctx); +err_out: + /* Make sure we revert to the safe super operations. */ + sb->s_op = &ntfs_mount_sops; + ntfs_error(sb, "Failed. Marking inode as bad."); + make_bad_inode(vi); + goto out_now; } -/* - * NOTE: Neither of the ntfs_*_bit functions are atomic! But we don't need - * them atomic at present as we never operate on shared/cached bitmaps. +/** + * ntfs_dirty_inode - mark the inode's metadata dirty + * @vi: inode to mark dirty + * + * This is called from fs/inode.c::__mark_inode_dirty(), when the inode itself + * is being marked dirty. An example is when UPDATE_ATIME() is invoked. + * + * We mark the inode dirty by setting both the page in which the mft record + * resides and the buffer heads in that page which correspond to the mft record + * dirty. This ensures that the changes will eventually be propagated to disk + * when the inode is set dirty. + * + * FIXME: Can we do that with the buffer heads? I am not too sure. Because if we + * do that we need to make sure that the kernel will not write out those buffer + * heads or we are screwed as it will write corrupt data to disk. The only way + * a mft record can be written correctly is by mst protecting it, writting it + * synchronously and fast mst deprotecting it. During this period, obviously, + * the mft record must be marked as not uptodate, be locked for writing or + * whatever, so that nobody attempts anything stupid. + * + * FIXME: Do we need to check that the fs is not mounted read only? And what + * about the inode? Anything else? + * + * FIXME: As we are only a read only driver it is safe to just return here for + * the moment. */ -static __inline__ int ntfs_test_bit(unsigned char *byte, const int bit) +void ntfs_dirty_inode(struct inode *vi) { - return byte[bit >> 3] & (1 << (bit & 7)) ? 1 : 0; + ntfs_debug("Entering for inode 0x%lx.", vi->i_ino); + NInoSetDirty(NTFS_I(vi)); + return; } -static __inline__ void ntfs_set_bit(unsigned char *byte, const int bit) +/** + * ntfs_commit_inode - write out a dirty inode + * @ni: inode to write out + * + */ +int ntfs_commit_inode(ntfs_inode *ni) { - byte[bit >> 3] |= 1 << (bit & 7); + ntfs_debug("Entering for inode 0x%lx.", ni->mft_no); + NInoClearDirty(ni); + return 0; } -static __inline__ void ntfs_clear_bit(unsigned char *byte, const int bit) +/** + * ntfs_put_inode - handler for when the inode reference count is decremented + * @vi: vfs inode + * + * The VFS calls ntfs_put_inode() every time the inode reference count (i_count) + * is about to be decremented (but before the decrement itself. + * + * If the inode @vi is a directory with a single reference, we need to put the + * attribute inode for the directory index bitmap, if it is present, otherwise + * the directory inode would remain pinned for ever (or rather until umount() + * time. + */ +void ntfs_put_inode(struct inode *vi) { - byte[bit >> 3] &= ~(1 << (bit & 7)); -} + if (S_ISDIR(vi->i_mode) && (atomic_read(&vi->i_count) == 2)) { + ntfs_inode *ni; -static __inline__ int ntfs_test_and_clear_bit(unsigned char *byte, - const int bit) -{ - unsigned char *ptr = byte + (bit >> 3); - int b = 1 << (bit & 7); - int oldbit = *ptr & b ? 1 : 0; - *ptr &= ~b; - return oldbit; + ni = NTFS_I(vi); + if (NInoIndexAllocPresent(ni) && ni->itype.index.bmp_ino) { + iput(ni->itype.index.bmp_ino); + ni->itype.index.bmp_ino = NULL; + } + } + return; } -static void dump_runlist(const ntfs_runlist *rl, const int rlen) +void __ntfs_clear_inode(ntfs_inode *ni) { -#ifdef DEBUG - int i; - ntfs_cluster_t ct; + int err; - ntfs_debug(DEBUG_OTHER, "%s(): rlen = %i.\n", __FUNCTION__, rlen); - ntfs_debug(DEBUG_OTHER, "VCN LCN Run length\n"); - for (i = 0, ct = 0; i < rlen; ct += rl[i++].len) { - if (rl[i].lcn == (ntfs_cluster_t)-1) - ntfs_debug(DEBUG_OTHER, "0x%-8x LCN_HOLE 0x%-8x " - "(%s)\n", ct, rl[i].len, rl[i].len ? - "sparse run" : "run list end"); - else - ntfs_debug(DEBUG_OTHER, "0x%-8x 0x%-8x 0x%-8x%s\n", ct, - rl[i].lcn, rl[i].len, rl[i].len && - i + 1 < rlen ? "" : " (run list end)"); - if (!rl[i].len) - break; + ntfs_debug("Entering for inode 0x%lx.", ni->mft_no); + if (NInoDirty(ni)) { + err = ntfs_commit_inode(ni); + if (err) { + ntfs_error(ni->vol->sb, "Failed to commit dirty " + "inode synchronously."); + // FIXME: Do something!!! + } + } + /* Synchronize with ntfs_commit_inode(). */ + down(&ni->mrec_lock); + up(&ni->mrec_lock); + if (NInoDirty(ni)) { + ntfs_error(ni->vol->sb, "Failed to commit dirty inode " + "asynchronously."); + // FIXME: Do something!!! + } + /* No need to lock at this stage as no one else has a reference. */ + if (ni->nr_extents > 0) { + int i; + + // FIXME: Handle dirty case for each extent inode! + for (i = 0; i < ni->nr_extents; i++) + ntfs_clear_extent_inode(ni->ext.extent_ntfs_inos[i]); + kfree(ni->ext.extent_ntfs_inos); + } + /* Free all alocated memory. */ + down_write(&ni->run_list.lock); + if (ni->run_list.rl) { + ntfs_free(ni->run_list.rl); + ni->run_list.rl = NULL; + } + up_write(&ni->run_list.lock); + + if (ni->attr_list) { + ntfs_free(ni->attr_list); + ni->attr_list = NULL; + } + + down_write(&ni->attr_list_rl.lock); + if (ni->attr_list_rl.rl) { + ntfs_free(ni->attr_list_rl.rl); + ni->attr_list_rl.rl = NULL; + } + up_write(&ni->attr_list_rl.lock); + + if (ni->name_len && ni->name != I30) { + /* Catch bugs... */ + BUG_ON(!ni->name); + kfree(ni->name); } -#endif } -/** - * splice_runlists - splice two run lists into one - * @rl1: pointer to address of first run list - * @r1len: number of elementfs in first run list - * @rl2: pointer to second run list - * @r2len: number of elements in second run list - * - * Append the run list @rl2 to the run list *@rl1 and return the result in - * *@rl1 and *@r1len. - * - * Return 0 on success or -errno on error, in which case *@rl1 and *@r1len are - * left untouched. - * - * The only possible error code at the moment is -ENOMEM and only happens if - * there is insufficient memory to allocate the new run list (only happens - * when size of (rl1 + rl2) > allocated size of rl1). - */ -int splice_runlists(ntfs_runlist **rl1, int *r1len, const ntfs_runlist *rl2, - int r2len) +void ntfs_clear_extent_inode(ntfs_inode *ni) { - ntfs_runlist *rl; - int rlen, rl_size, rl2_pos; + __ntfs_clear_inode(ni); - ntfs_debug(DEBUG_OTHER, "%s(): Entering with *r1len = %i, " - "r2len = %i.\n", __FUNCTION__, *r1len, r2len); - ntfs_debug(DEBUG_OTHER, "%s(): Dumping 1st runlist.\n", __FUNCTION__); - if (*rl1) - dump_runlist(*rl1, *r1len); - else - ntfs_debug(DEBUG_OTHER, "%s(): Not present.\n", __FUNCTION__); - ntfs_debug(DEBUG_OTHER, "%s(): Dumping 2nd runlist.\n", __FUNCTION__); - dump_runlist(rl2, r2len); - rlen = *r1len + r2len + 1; - rl_size = (rlen * sizeof(ntfs_runlist) + PAGE_SIZE - 1) & - PAGE_MASK; - ntfs_debug(DEBUG_OTHER, "%s(): rlen = %i, rl_size = %i.\n", - __FUNCTION__, rlen, rl_size); - /* Do we have enough space? */ - if (rl_size <= ((*r1len * sizeof(ntfs_runlist) + PAGE_SIZE - 1) & - PAGE_MASK)) { - /* Have enough space already. */ - rl = *rl1; - ntfs_debug(DEBUG_OTHER, "%s(): Have enough space already.\n", - __FUNCTION__); - } else { - /* Need more space. Reallocate. */ - ntfs_debug(DEBUG_OTHER, "%s(): Need more space.\n", - __FUNCTION__); - rl = ntfs_vmalloc(rlen << sizeof(ntfs_runlist)); - if (!rl) - return -ENOMEM; - /* Copy over rl1. */ - ntfs_memcpy(rl, *rl1, *r1len * sizeof(ntfs_runlist)); - ntfs_vfree(*rl1); - *rl1 = rl; - } - /* Reuse rl_size as the current position index into rl. */ - rl_size = *r1len - 1; - ntfs_debug(DEBUG_OTHER, "%s(): rl_size = %i.\n", __FUNCTION__,rl_size); - /* Coalesce neighbouring elements, if present. */ - rl2_pos = 0; - if (rl[rl_size].lcn + rl[rl_size].len == rl2[rl2_pos].lcn) { - ntfs_debug(DEBUG_OTHER, "%s(): Coalescing adjacent runs.\n", - __FUNCTION__); - ntfs_debug(DEBUG_OTHER, "%s(): Before: rl[rl_size].len = %i.\n", - __FUNCTION__, rl[rl_size].len); - rl[rl_size].len += rl2[rl2_pos].len; - ntfs_debug(DEBUG_OTHER, "%s(): After: rl[rl_size].len = %i.\n", - __FUNCTION__, rl[rl_size].len); - rl2_pos++; - r2len--; - rlen--; - } - rl_size++; - /* Copy over rl2. */ - ntfs_memcpy(rl + rl_size, rl2 + rl2_pos, r2len * sizeof(ntfs_runlist)); - rlen--; - rl[rlen].lcn = (ntfs_cluster_t)-1; - rl[rlen].len = (ntfs_cluster_t)0; - *r1len = rlen; - ntfs_debug(DEBUG_OTHER, "%s(): Dumping result runlist.\n", - __FUNCTION__); - dump_runlist(*rl1, *r1len); - ntfs_debug(DEBUG_OTHER, "%s(): Returning with *r1len = %i.\n", - __FUNCTION__, rlen); - return 0; + /* Bye, bye... */ + ntfs_destroy_extent_inode(ni); } /** - * ntfs_alloc_mft_record - allocate an mft record - * @vol: volume to allocate an mft record on - * @result: the mft record number allocated - * - * Allocate a new mft record on disk. Return 0 on success or -ERRNO on error. - * On success, *@result contains the allocated mft record number. On error, - * *@result is -1UL. - * - * Note, this function doesn't actually set the mft record to be in use. This - * is done by the caller, which at the moment is only ntfs_alloc_inode(). - * - * To find a free mft record, we scan the mft bitmap for a zero bit. To - * optimize this we start scanning at the place where we last stopped and we - * perform wrap around when we reach the end. Note, we do not try to allocate - * mft records below number 24 because numbers 0 to 15 are the defined system - * files anyway and 16 to 24 are special in that they are used for storing - * extension mft records for $MFT's $DATA attribute. This is required to avoid - * the possibility of creating a run list with a circular dependence which once - * written to disk can never be read in again. Windows will only use records - * 16 to 24 for normal files if the volume is completely out of space. We never - * use them which means that when the volume is really out of space we cannot - * create any more files while Windows can still create up to 8 small files. We - * can start doing this at some later time, doesn't matter much for now. - * - * When scanning the mft bitmap, we only search up to the last allocated mft - * record. If there are no free records left in the range 24 to number of - * allocated mft records, then we extend the mft data in order to create free - * mft records. We extend the allocated size of $MFT/$DATA by 16 records at a - * time or one cluster, if cluster size is above 16kiB. If there isn't - * sufficient space to do this, we try to extend by a single mft record or one - * cluster, if cluster size is above mft record size, but we only do this if - * there is enough free space, which we know from the values returned by the - * failed cluster allocation function when we tried to do the first allocation. - * - * No matter how many mft records we allocate, we initialize only the first - * allocated mft record (incrementing mft data size and initialized size) and - * return its number to the caller in @*result, unless there are less than 24 - * mft records, in which case we allocate and initialize mft records until we - * reach record 24 which we consider as the first free mft record for use by - * normal files. - * - * If during any stage we overflow the initialized data in the mft bitmap, we - * extend the initialized size (and data size) by 8 bytes, allocating another - * cluster if required. The bitmap data size has to be at least equal to the - * number of mft records in the mft, but it can be bigger, in which case the - * superflous bits are padded with zeroes. - * - * Thus, when we return successfully (return value 0), we will have: - * - initialized / extended the mft bitmap if necessary, - * - initialized / extended the mft data if necessary, - * - set the bit corresponding to the mft record being allocated in the - * mft bitmap, and we will - * - return the mft record number in @*result. - * - * On error (return value below zero), nothing will have changed. If we had - * changed anything before the error occured, we will have reverted back to - * the starting state before returning to the caller. Thus, except for bugs, - * we should always leave the volume in a consitents state when returning from - * this function. NOTE: Small exception to this is that we set the bit in the - * mft bitmap but we do not mark the mft record in use, which is inconsistent. - * However, the caller will immediately add the wanted attributes to the mft - * record, set it in use and write it out to disk, so there should be no - * problem. - * - * Note, this function cannot make use of most of the normal functions, like - * for example for attribute resizing, etc, because when the run list overflows - * the base mft record and an attribute list is used, it is very important - * that the extension mft records used to store the $DATA attribute of $MFT - * can be reached without having to read the information contained inside - * them, as this would make it impossible to find them in the first place - * after the volume is dismounted. $MFT/$BITMAP probably doesn't need to - * follow this rule because the bitmap is not essential for finding the mft - * records, but on the other hand, handling the bitmap in this special way - * would make life easier because otherwise there might be circular invocations - * of functions when reading the bitmap but if we are careful, we should be - * able to avoid all problems. + * ntfs_clear_big_inode - clean up the ntfs specific part of an inode + * @vi: vfs inode pending annihilation * - * FIXME: Don't forget $MftMirr, though this probably belongs in - * ntfs_update_inode() (or even deeper). (AIA) + * When the VFS is going to remove an inode from memory, ntfs_clear_big_inode() + * is called, which deallocates all memory belonging to the NTFS specific part + * of the inode and returns. * - * FIXME: Want finer grained locking. (AIA) + * If the MFT record is dirty, we commit it before doing anything else. */ -static int ntfs_alloc_mft_record(ntfs_volume *vol, unsigned long *result) +void ntfs_clear_big_inode(struct inode *vi) { - unsigned long nr_mft_records, buf_size, buf_pos, pass_start, pass_end; - unsigned long last_read_pos, mft_rec_size, bit, l; - ntfs_attribute *data, *bmp; - __u8 *buf, *byte, pass, b, have_allocated_mftbmp = 0; - int rlen, rl_size = 0, r2len, rl2_size, old_data_rlen, err = 0; - ntfs_runlist *rl, *rl2; - ntfs_cluster_t lcn = 0, old_data_len; - ntfs_io io; - __s64 ll, old_data_allocated, old_data_initialized, old_data_size; - - *result = -1UL; - /* Allocate a buffer and setup the io structure. */ - buf = (__u8*)__get_free_page(GFP_NOFS); - if (!buf) - return -ENOMEM; - lock_kernel(); - /* Get the $DATA and $BITMAP attributes of $MFT. */ - data = ntfs_find_attr(vol->mft_ino, vol->at_data, 0); - bmp = ntfs_find_attr(vol->mft_ino, vol->at_bitmap, 0); - if (!data || !bmp) { - err = -EINVAL; - goto err_ret; - } - /* Determine the number of allocated mft records in the mft. */ - pass_end = nr_mft_records = data->allocated >> - vol->mft_record_size_bits; - ntfs_debug(DEBUG_OTHER, "%s(): nr_mft_records = %lu.\n", __FUNCTION__, - nr_mft_records); - /* Make sure we don't overflow the bitmap. */ - l = bmp->initialized << 3; - if (l < nr_mft_records) - // FIXME: It might be a good idea to extend the bitmap instead. - pass_end = l; - pass = 1; - buf_pos = vol->mft_data_pos; - if (buf_pos >= pass_end) { - buf_pos = 24UL; - pass = 2; - } - pass_start = buf_pos; - rl = bmp->d.r.runlist; - rlen = bmp->d.r.len - 1; - lcn = rl[rlen].lcn + rl[rlen].len; - io.fn_put = ntfs_put; - io.fn_get = ntfs_get; - ntfs_debug(DEBUG_OTHER, "%s(): Starting bitmap search.\n", - __FUNCTION__); - ntfs_debug(DEBUG_OTHER, "%s(): pass = %i, pass_start = %lu, pass_end = " - "%lu.\n", __FUNCTION__, pass, pass_start, pass_end); - byte = NULL; // FIXME: For debugging only. - /* Loop until a free mft record is found. */ - io.size = (nr_mft_records >> 3) & ~PAGE_MASK; - for (;; io.size = PAGE_SIZE) { - io.param = buf; - io.do_read = 1; - last_read_pos = buf_pos >> 3; - ntfs_debug(DEBUG_OTHER, "%s(): Before: bmp->allocated = 0x%Lx, " - "bmp->size = 0x%Lx, bmp->initialized = " - "0x%Lx.\n", __FUNCTION__, bmp->allocated, - bmp->size, bmp->initialized); - err = ntfs_readwrite_attr(vol->mft_ino, bmp, last_read_pos, - &io); - if (err) - goto err_ret; - ntfs_debug(DEBUG_OTHER, "%s(): Read %lu bytes.\n", __FUNCTION__, - (unsigned long)io.size); - ntfs_debug(DEBUG_OTHER, "%s(): After: bmp->allocated = 0x%Lx, " - "bmp->size = 0x%Lx, bmp->initialized = " - "0x%Lx.\n", __FUNCTION__, bmp->allocated, - bmp->size, bmp->initialized); - if (!io.size) - goto pass_done; - buf_size = io.size << 3; - bit = buf_pos & 7UL; - buf_pos &= ~7UL; - ntfs_debug(DEBUG_OTHER, "%s(): Before loop: buf_size = %lu, " - "buf_pos = %lu, bit = %lu, *byte = 0x%x, b = " - "%u.\n", __FUNCTION__, buf_size, buf_pos, bit, - byte ? *byte : -1, b); - for (; bit < buf_size && bit + buf_pos < pass_end; - bit &= ~7UL, bit += 8UL) { - byte = buf + (bit >> 3); - if (*byte == 0xff) - continue; - b = ffz((unsigned long)*byte); - if (b < (__u8)8 && b >= (bit & 7UL)) { - bit = b + (bit & ~7UL) + buf_pos; - ntfs_debug(DEBUG_OTHER, "%s(): Found free rec " - "in for loop. bit = %lu\n", - __FUNCTION__, bit); - goto found_free_rec; - } - } - ntfs_debug(DEBUG_OTHER, "%s(): After loop: buf_size = %lu, " - "buf_pos = %lu, bit = %lu, *byte = 0x%x, b = " - "%u.\n", __FUNCTION__, buf_size, buf_pos, bit, - byte ? *byte : -1, b); - buf_pos += buf_size; - if (buf_pos < pass_end) - continue; -pass_done: /* Finished with the current pass. */ - ntfs_debug(DEBUG_OTHER, "%s(): At pass_done.\n", __FUNCTION__); - if (pass == 1) { - /* - * Now do pass 2, scanning the first part of the zone - * we omitted in pass 1. - */ - ntfs_debug(DEBUG_OTHER, "%s(): Done pass 1.\n", - __FUNCTION__); - ntfs_debug(DEBUG_OTHER, "%s(): Pass = 2.\n", - __FUNCTION__); - pass = 2; - pass_end = pass_start; - buf_pos = pass_start = 24UL; - ntfs_debug(DEBUG_OTHER, "%s(): pass = %i, pass_start = " - "%lu, pass_end = %lu.\n", __FUNCTION__, - pass, pass_start, pass_end); - continue; - } /* pass == 2 */ - /* No free records left. */ - if (bmp->initialized << 3 > nr_mft_records && - bmp->initialized > 3) { - /* - * The mft bitmap is already bigger but the space is - * not covered by mft records, this implies that the - * next records are all free, so we already have found - * a free record. - */ - bit = nr_mft_records; - if (bit < 24UL) - bit = 24UL; - ntfs_debug(DEBUG_OTHER, "%s(): Found free record bit " - "(#1) = 0x%lx.\n", __FUNCTION__, bit); - goto found_free_rec; - } - ntfs_debug(DEBUG_OTHER, "%s(): Done pass 2.\n", __FUNCTION__); - ntfs_debug(DEBUG_OTHER, "%s(): Before: bmp->allocated = 0x%Lx, " - "bmp->size = 0x%Lx, bmp->initialized = " - "0x%Lx.\n", __FUNCTION__, bmp->allocated, - bmp->size, bmp->initialized); - /* Need to extend the mft bitmap. */ - if (bmp->initialized + 8LL > bmp->allocated) { - ntfs_io io2; - - ntfs_debug(DEBUG_OTHER, "%s(): Initialized " - "> allocated.\n", __FUNCTION__); - /* Need to extend bitmap by one more cluster. */ - rl = bmp->d.r.runlist; - rlen = bmp->d.r.len - 1; - lcn = rl[rlen].lcn + rl[rlen].len; - io2.fn_put = ntfs_put; - io2.fn_get = ntfs_get; - io2.param = &b; - io2.size = 1; - io2.do_read = 1; - err = ntfs_readwrite_attr(vol->bitmap, data, lcn >> 3, - &io2); - if (err) - goto err_ret; - ntfs_debug(DEBUG_OTHER, "%s(): Read %lu bytes.\n", - __FUNCTION__, (unsigned long)io2.size); - if (io2.size == 1 && b != 0xff) { - __u8 tb = 1 << (lcn & (ntfs_cluster_t)7); - if (!(b & tb)) { - /* Next cluster is free. Allocate it. */ - b |= tb; - io2.param = &b; - io2.do_read = 0; - err = ntfs_readwrite_attr(vol->bitmap, - data, lcn >> 3, &io2); - if (err || io.size != 1) { - if (!err) - err = -EIO; - goto err_ret; - } -append_mftbmp_simple: rl[rlen].len++; - have_allocated_mftbmp |= 1; - ntfs_debug(DEBUG_OTHER, "%s(): " - "Appending one cluster " - "to mftbmp.\n", - __FUNCTION__); - } - } - if (!have_allocated_mftbmp) { - /* Allocate a cluster from the DATA_ZONE. */ - ntfs_cluster_t lcn2 = lcn; - ntfs_cluster_t count = 1; - err = ntfs_allocate_clusters(vol, &lcn2, - &count, &rl2, &r2len, - DATA_ZONE); - if (err) - goto err_ret; - if (count != 1 || lcn2 <= 0) { - if (count > 0) { -rl2_dealloc_err_out: if (ntfs_deallocate_clusters( - vol, rl2, r2len)) - ntfs_error("%s(): " - "Cluster " - "deallocation in error " - "code path failed! You " - "should run chkdsk.\n", - __FUNCTION__); - } - ntfs_vfree(rl2); - if (!err) - err = -EINVAL; - goto err_ret; - } - if (lcn2 == lcn) { - ntfs_vfree(rl2); - goto append_mftbmp_simple; - } - /* We need to append a new run. */ - rl_size = (rlen * sizeof(ntfs_runlist) + - PAGE_SIZE - 1) & PAGE_MASK; - /* Reallocate memory if necessary. */ - if ((rlen + 2) * sizeof(ntfs_runlist) >= - rl_size) { - ntfs_runlist *rlt; - - rl_size += PAGE_SIZE; - rlt = ntfs_vmalloc(rl_size); - if (!rlt) { - err = -ENOMEM; - goto rl2_dealloc_err_out; - } - ntfs_memcpy(rlt, rl, rl_size - - PAGE_SIZE); - ntfs_vfree(rl); - bmp->d.r.runlist = rl = rlt; - } - ntfs_vfree(rl2); - rl[rlen].lcn = lcn = lcn2; - rl[rlen].len = count; - bmp->d.r.len = ++rlen; - have_allocated_mftbmp |= 2; - ntfs_debug(DEBUG_OTHER, "%s(): Adding run to " - "mftbmp. LCN = %i, len = %i\n", - __FUNCTION__, lcn, count); - } - /* - * We now have extended the mft bitmap allocated size - * by one cluster. Reflect this in the attribute. - */ - bmp->allocated += (__s64)vol->cluster_size; - } - ntfs_debug(DEBUG_OTHER, "%s(): After: bmp->allocated = 0x%Lx, " - "bmp->size = 0x%Lx, bmp->initialized = " - "0x%Lx.\n", __FUNCTION__, bmp->allocated, - bmp->size, bmp->initialized); - /* We now have sufficient allocated space. */ - ntfs_debug(DEBUG_OTHER, "%s(): Now have sufficient allocated " - "space in mftbmp.\n", __FUNCTION__); - ntfs_debug(DEBUG_OTHER, "%s(): Before: bmp->allocated = 0x%Lx, " - "bmp->size = 0x%Lx, bmp->initialized = " - "0x%Lx.\n", __FUNCTION__, bmp->allocated, - bmp->size, bmp->initialized); - buf_pos = bmp->initialized; - bmp->initialized += 8LL; - if (bmp->initialized > bmp->size) - bmp->size = bmp->initialized; - ntfs_debug(DEBUG_OTHER, "%s(): After: bmp->allocated = 0x%Lx, " - "bmp->size = 0x%Lx, bmp->initialized = " - "0x%Lx.\n", __FUNCTION__, bmp->allocated, - bmp->size, bmp->initialized); - have_allocated_mftbmp |= 4; - /* Update the mft bitmap attribute value. */ - memset(buf, 0, 8); - io.param = buf; - io.size = 8; - io.do_read = 0; - err = ntfs_readwrite_attr(vol->mft_ino, bmp, buf_pos, &io); - if (err || io.size != 8) { - if (!err) - err = -EIO; - goto shrink_mftbmp_err_ret; - } - ntfs_debug(DEBUG_OTHER, "%s(): Wrote extended mftbmp bytes " - "%lu.\n", __FUNCTION__, (unsigned long)io.size); - ntfs_debug(DEBUG_OTHER, "%s(): After write: bmp->allocated = " - "0x%Lx, bmp->size = 0x%Lx, bmp->initialized = " - "0x%Lx.\n", __FUNCTION__, bmp->allocated, - bmp->size, bmp->initialized); - bit = buf_pos << 3; - ntfs_debug(DEBUG_OTHER, "%s(): Found free record bit (#2) = " - "0x%lx.\n", __FUNCTION__, bit); - goto found_free_rec; - } -found_free_rec: - /* bit is the found free mft record. Allocate it in the mft bitmap. */ - vol->mft_data_pos = bit; - ntfs_debug(DEBUG_OTHER, "%s(): At found_free_rec.\n", __FUNCTION__); - io.param = buf; - io.size = 1; - io.do_read = 1; - ntfs_debug(DEBUG_OTHER, "%s(): Before update: bmp->allocated = 0x%Lx, " - "bmp->size = 0x%Lx, bmp->initialized = 0x%Lx.\n", - __FUNCTION__, bmp->allocated, - bmp->size, bmp->initialized); - err = ntfs_readwrite_attr(vol->mft_ino, bmp, bit >> 3, &io); - if (err || io.size != 1) { - if (!err) - err = -EIO; - goto shrink_mftbmp_err_ret; - } - ntfs_debug(DEBUG_OTHER, "%s(): Read %lu bytes.\n", __FUNCTION__, - (unsigned long)io.size); -#ifdef DEBUG - /* Check our bit is really zero! */ - if (*buf & (1 << (bit & 7))) - BUG(); -#endif - *buf |= 1 << (bit & 7); - io.param = buf; - io.do_read = 0; - err = ntfs_readwrite_attr(vol->mft_ino, bmp, bit >> 3, &io); - if (err || io.size != 1) { - if (!err) - err = -EIO; - goto shrink_mftbmp_err_ret; - } - ntfs_debug(DEBUG_OTHER, "%s(): Wrote %lu bytes.\n", __FUNCTION__, - (unsigned long)io.size); - ntfs_debug(DEBUG_OTHER, "%s(): After update: bmp->allocated = 0x%Lx, " - "bmp->size = 0x%Lx, bmp->initialized = 0x%Lx.\n", - __FUNCTION__, bmp->allocated, - bmp->size, bmp->initialized); - /* The mft bitmap is now uptodate. Deal with mft data attribute now. */ - ll = (__s64)(bit + 1) << vol->mft_record_size_bits; - if (ll <= data->initialized) { - /* The allocated record is already initialized. We are done! */ - ntfs_debug(DEBUG_OTHER, "%s(): Allocated mft record " - "already initialized!\n", __FUNCTION__); - goto done_ret; - } - ntfs_debug(DEBUG_OTHER, "%s(): Allocated mft record needs " - "to be initialized.\n", __FUNCTION__); - /* The mft record is outside the initialized data. */ - mft_rec_size = (unsigned long)vol->mft_record_size; - /* Preserve old values for undo purposes. */ - old_data_allocated = data->allocated; - old_data_rlen = data->d.r.len - 1; - old_data_len = data->d.r.runlist[old_data_rlen].len; - /* - * If necessary, extend the mft until it covers the allocated record. - * The loop is only actually used when a freshly formatted volume is - * first written to. But it optimizes away nicely in the common case. - */ - while (ll > data->allocated) { - ntfs_cluster_t lcn2, nr_lcn2, nr, min_nr; + ntfs_inode *ni = NTFS_I(vi); - ntfs_debug(DEBUG_OTHER, "%s(): Extending mft data allocation, " - "data->allocated = 0x%Lx, data->size = 0x%Lx, " - "data->initialized = 0x%Lx.\n", __FUNCTION__, - data->allocated, data->size, data->initialized); - /* Minimum allocation is one mft record worth of clusters. */ - if (mft_rec_size <= vol->cluster_size) - min_nr = (ntfs_cluster_t)1; - else - min_nr = mft_rec_size >> vol->cluster_size_bits; - ntfs_debug(DEBUG_OTHER, "%s(): min_nr = %i.\n", __FUNCTION__, - min_nr); - /* Allocate 16 mft records worth of clusters. */ - nr = mft_rec_size << 4 >> vol->cluster_size_bits; - if (!nr) - nr = (ntfs_cluster_t)1; - /* Determine the preferred allocation location. */ - ntfs_debug(DEBUG_OTHER, "%s(): nr = %i.\n", __FUNCTION__, nr); - rl2 = data->d.r.runlist; - r2len = data->d.r.len; - lcn2 = rl2[r2len - 1].lcn + rl2[r2len - 1].len; - ntfs_debug(DEBUG_OTHER, "%s(): rl2[r2len - 1].lcn = %i, .len = " - "%i.\n", __FUNCTION__, rl2[r2len - 1].lcn, - rl2[r2len - 1].len); - ntfs_debug(DEBUG_OTHER, "%s(): lcn2 = %i, r2len = %i.\n", - __FUNCTION__, lcn2, r2len); -retry_mft_data_allocation: - nr_lcn2 = nr; - err = ntfs_allocate_clusters(vol, &lcn2, &nr_lcn2, &rl2, - &r2len, MFT_ZONE); -#ifdef DEBUG - if (!err && nr_lcn2 < min_nr) - /* Allocated less than minimum needed. Weird! */ - BUG(); -#endif - if (err) { - /* - * If there isn't enough space to do the wanted - * allocation, but there is enough space to do a - * minimal allocation, then try that, unless the wanted - * allocation was already the minimal allocation. - */ - if (err == -ENOSPC && nr > min_nr && - nr_lcn2 >= min_nr) { - nr = min_nr; - ntfs_debug(DEBUG_OTHER, "%s(): Retrying mft " - "data allocation, nr = min_nr " - "= %i.\n", __FUNCTION__, nr); - goto retry_mft_data_allocation; - } - goto undo_mftbmp_alloc_err_ret; - } - ntfs_debug(DEBUG_OTHER, "%s(): Allocated %i clusters starting " - "at LCN %i.\n", __FUNCTION__, nr_lcn2, lcn2); - ntfs_debug(DEBUG_OTHER, "%s(): Allocated runlist:\n", - __FUNCTION__); - dump_runlist(rl2, r2len); - /* Append rl2 to the mft data attribute's run list. */ - err = splice_runlists(&data->d.r.runlist, (int*)&data->d.r.len, - rl2, r2len); - if (err) { - ntfs_debug(DEBUG_OTHER, "%s(): splice_runlists failed " - "with error code %i.\n", __FUNCTION__, - -err); - goto undo_partial_data_alloc_err_ret; - } - /* Reflect the allocated clusters in the mft allocated data. */ - data->allocated += nr_lcn2 << vol->cluster_size_bits; - ntfs_debug(DEBUG_OTHER, "%s(): After extending mft data " - "allocation, data->allocated = 0x%Lx, " - "data->size = 0x%Lx, data->initialized = " - "0x%Lx.\n", __FUNCTION__, data->allocated, - data->size, data->initialized); - } - /* Prepare a formatted (empty) mft record. */ - memset(buf, 0, mft_rec_size); - ntfs_fill_mft_header(buf, mft_rec_size, 0, 0, 0); - err = ntfs_insert_fixups(buf, mft_rec_size); - if (err) - goto undo_data_alloc_err_ret; - /* - * Extend mft data initialized size to reach the allocated mft record - * and write the formatted mft record buffer to each mft record being - * initialized. Note, that ntfs_readwrite_attr extends both - * data->initialized and data->size, so no need for us to touch them. - */ - old_data_initialized = data->initialized; - old_data_size = data->size; - while (ll > data->initialized) { - ntfs_debug(DEBUG_OTHER, "%s(): Initializing mft record " - "0x%Lx.\n", __FUNCTION__, - data->initialized >> vol->mft_record_size_bits); - io.param = buf; - io.size = mft_rec_size; - io.do_read = 0; - err = ntfs_readwrite_attr(vol->mft_ino, data, - data->initialized, &io); - if (err || io.size != mft_rec_size) { - if (!err) - err = -EIO; - goto undo_data_init_err_ret; - } - ntfs_debug(DEBUG_OTHER, "%s(): Wrote %i bytes to mft data.\n", - __FUNCTION__, io.size); - } - /* Update the VFS inode size as well. */ - VFS_I(vol->mft_ino)->i_size = data->size; -#ifdef DEBUG - ntfs_debug(DEBUG_OTHER, "%s(): After mft record " - "initialization: data->allocated = 0x%Lx, data->size " - "= 0x%Lx, data->initialized = 0x%Lx.\n", __FUNCTION__, - data->allocated, data->size, data->initialized); - /* Sanity checks. */ - if (data->size > data->allocated || data->size < data->initialized || - data->initialized > data->allocated) - BUG(); -#endif -done_ret: - /* Return the number of the allocated mft record. */ - ntfs_debug(DEBUG_OTHER, "%s(): At done_ret. *result = bit = 0x%lx.\n", - __FUNCTION__, bit); - *result = bit; - vol->mft_data_pos = bit + 1; -err_ret: - unlock_kernel(); - free_page((unsigned long)buf); - ntfs_debug(DEBUG_OTHER, "%s(): Syncing inode $MFT.\n", __FUNCTION__); - if (ntfs_update_inode(vol->mft_ino)) - ntfs_error("%s(): Failed to sync inode $MFT. " - "Continuing anyway.\n",__FUNCTION__); - if (!err) { - ntfs_debug(DEBUG_FILE3, "%s(): Done. Allocated mft record " - "number *result = 0x%lx.\n", __FUNCTION__, - *result); - return 0; + __ntfs_clear_inode(ni); + + if (NInoAttr(ni)) { + /* Release the base inode if we are holding it. */ + if (ni->nr_extents == -1) { + iput(VFS_I(ni->ext.base_ntfs_ino)); + ni->nr_extents = 0; + ni->ext.base_ntfs_ino = NULL; + } } - if (err != -ENOSPC) - ntfs_error("%s(): Failed to allocate an mft record. Returning " - "error code %i.\n", __FUNCTION__, -err); - else - ntfs_debug(DEBUG_FILE3, "%s(): Failed to allocate an mft " - "record due to lack of free space.\n", - __FUNCTION__); - return err; -undo_data_init_err_ret: - ntfs_debug(DEBUG_OTHER, "%s(): At undo_data_init_err_ret.\n", - __FUNCTION__); - data->initialized = old_data_initialized; - data->size = old_data_size; -undo_data_alloc_err_ret: - ntfs_debug(DEBUG_OTHER, "%s(): At undo_data_alloc_err_ret.\n", - __FUNCTION__); - data->allocated = old_data_allocated; -undo_partial_data_alloc_err_ret: - ntfs_debug(DEBUG_OTHER, "%s(): At undo_partial_data_alloc_err_ret.\n", - __FUNCTION__); - /* Deallocate the clusters. */ - if (ntfs_deallocate_clusters(vol, rl2, r2len)) - ntfs_error("%s(): Error deallocating clusters in error code " - "path. You should run chkdsk.\n", __FUNCTION__); - ntfs_vfree(rl2); - /* Revert the run list back to what it was before. */ - r2len = data->d.r.len; - rl2 = data->d.r.runlist; - rl2[old_data_rlen++].len = old_data_len; - rl2[old_data_rlen].lcn = (ntfs_cluster_t)-1; - rl2[old_data_rlen].len = (ntfs_cluster_t)0; - data->d.r.len = old_data_rlen; - rl2_size = ((old_data_rlen + 1) * sizeof(ntfs_runlist) + PAGE_SIZE - - 1) & PAGE_MASK; - /* Reallocate memory freeing any extra memory allocated. */ - if (rl2_size < ((r2len * sizeof(ntfs_runlist) + PAGE_SIZE - 1) & - PAGE_MASK)) { - rl2 = ntfs_vmalloc(rl2_size); - if (rl2) { - ntfs_memcpy(rl2, data->d.r.runlist, rl2_size); - ntfs_vfree(data->d.r.runlist); - data->d.r.runlist = rl2; - } else - ntfs_error("%s(): Error reallocating " - "memory in error code path. This " - "should be harmless.\n", __FUNCTION__); - } -undo_mftbmp_alloc_err_ret: - ntfs_debug(DEBUG_OTHER, "%s(): At undo_mftbmp_alloc_err_ret.\n", - __FUNCTION__); - /* Deallocate the allocated bit in the mft bitmap. */ - io.param = buf; - io.size = 1; - io.do_read = 1; - err = ntfs_readwrite_attr(vol->mft_ino, bmp, bit >> 3, &io); - if (!err && io.size == 1) { - *buf &= ~(1 << (bit & 7)); - io.param = buf; - io.do_read = 0; - err = ntfs_readwrite_attr(vol->mft_ino, bmp, bit >> 3, &io); - } - if (err || io.size != 1) { - if (!err) - err = -EIO; - ntfs_error("%s(): Error deallocating mft record in error code " - "path. You should run chkdsk.\n", __FUNCTION__); - } -shrink_mftbmp_err_ret: - ntfs_debug(DEBUG_OTHER, "%s(): At shrink_mftbmp_err_ret.\n", - __FUNCTION__); - ntfs_debug(DEBUG_OTHER, "%s(): have_allocated_mftbmp = %i.\n", - __FUNCTION__, have_allocated_mftbmp); - if (!have_allocated_mftbmp) - goto err_ret; - /* Shrink the mftbmp back to previous size. */ - if (bmp->size == bmp->initialized) - bmp->size -= 8LL; - bmp->initialized -= 8LL; - have_allocated_mftbmp &= ~4; - /* If no allocation occured then we are done. */ - ntfs_debug(DEBUG_OTHER, "%s(): have_allocated_mftbmp = %i.\n", - __FUNCTION__, have_allocated_mftbmp); - if (!have_allocated_mftbmp) - goto err_ret; - /* Deallocate the allocated cluster. */ - bmp->allocated -= (__s64)vol->cluster_size; - if (ntfs_deallocate_cluster_run(vol, lcn, (ntfs_cluster_t)1)) - ntfs_error("%s(): Error deallocating cluster in error code " - "path. You should run chkdsk.\n", __FUNCTION__); - switch (have_allocated_mftbmp & 3) { - case 1: - /* Delete the last lcn from the last run of mftbmp. */ - rl[rlen - 1].len--; - break; - case 2: - /* Delete the last run of mftbmp. */ - bmp->d.r.len = --rlen; - /* Reallocate memory if necessary. */ - if ((rlen + 1) * sizeof(ntfs_runlist) <= rl_size - PAGE_SIZE) { - ntfs_runlist *rlt; - - rl_size -= PAGE_SIZE; - rlt = ntfs_vmalloc(rl_size); - if (rlt) { - ntfs_memcpy(rlt, rl, rl_size); - ntfs_vfree(rl); - bmp->d.r.runlist = rl = rlt; - } else - ntfs_error("%s(): Error " - "reallocating memory in error " - "code path. This should be " - "harmless.\n", __FUNCTION__); - } - bmp->d.r.runlist[bmp->d.r.len].lcn = (ntfs_cluster_t)-1; - bmp->d.r.runlist[bmp->d.r.len].len = (ntfs_cluster_t)0; - break; - default: + BUG_ON(ni->page); + if (!atomic_dec_and_test(&ni->count)) BUG(); + return; +} + +/** + * ntfs_show_options - show mount options in /proc/mounts + * @sf: seq_file in which to write our mount options + * @mnt: vfs mount whose mount options to display + * + * Called by the VFS once for each mounted ntfs volume when someone reads + * /proc/mounts in order to display the NTFS specific mount options of each + * mount. The mount options of the vfs mount @mnt are written to the seq file + * @sf and success is returned. + */ +int ntfs_show_options(struct seq_file *sf, struct vfsmount *mnt) +{ + ntfs_volume *vol = NTFS_SB(mnt->mnt_sb); + int i; + + seq_printf(sf, ",uid=%i", vol->uid); + seq_printf(sf, ",gid=%i", vol->gid); + if (vol->fmask == vol->dmask) + seq_printf(sf, ",umask=0%o", vol->fmask); + else { + seq_printf(sf, ",fmask=0%o", vol->fmask); + seq_printf(sf, ",dmask=0%o", vol->dmask); + } + seq_printf(sf, ",nls=%s", vol->nls_map->charset); + if (NVolCaseSensitive(vol)) + seq_printf(sf, ",case_sensitive"); + if (NVolShowSystemFiles(vol)) + seq_printf(sf, ",show_sys_files"); + for (i = 0; on_errors_arr[i].val; i++) { + if (on_errors_arr[i].val & vol->on_errors) + seq_printf(sf, ",errors=%s", on_errors_arr[i].str); } - goto err_ret; + seq_printf(sf, ",mft_zone_multiplier=%i", vol->mft_zone_multiplier); + return 0; } -/* We need 0x48 bytes in total. */ -static int add_standard_information(ntfs_inode *ino) +#ifdef NTFS_RW + +/** + * ntfs_truncate - called when the i_size of an ntfs inode is changed + * @vi: inode for which the i_size was changed + * + * We don't support i_size changes yet. + * + * Called with ->i_sem held. + */ +void ntfs_truncate(struct inode *vi) { - ntfs_time64_t now; - char data[0x30]; - char *position = data; - ntfs_attribute *si; - - now = ntfs_now(); - NTFS_PUTU64(position + 0x00, now); /* File creation */ - NTFS_PUTU64(position + 0x08, now); /* Last modification */ - NTFS_PUTU64(position + 0x10, now); /* Last mod for MFT */ - NTFS_PUTU64(position + 0x18, now); /* Last access */ - NTFS_PUTU64(position + 0x20, 0); /* MSDOS file perms */ - NTFS_PUTU64(position + 0x28, 0); /* unknown */ - return ntfs_create_attr(ino, ino->vol->at_standard_information, 0, - data, sizeof(data), &si); -} - -static int add_filename(ntfs_inode *ino, ntfs_inode *dir, - const unsigned char *filename, int length, ntfs_u32 flags) -{ - unsigned char *position; - unsigned int size; - ntfs_time64_t now; - int count, error; - unsigned char* data; - ntfs_attribute *fn; - - /* Work out the size. */ - size = 0x42 + 2 * length; - data = ntfs_malloc(size); - if (!data) - return -ENOMEM; - /* Search for a position. */ - position = data; - NTFS_PUTINUM(position, dir); /* Inode num of dir */ - now = ntfs_now(); - NTFS_PUTU64(position + 0x08, now); /* File creation */ - NTFS_PUTU64(position + 0x10, now); /* Last modification */ - NTFS_PUTU64(position + 0x18, now); /* Last mod for MFT */ - NTFS_PUTU64(position + 0x20, now); /* Last access */ - /* FIXME: Get the following two sizes by finding the data attribute - * in ino->attr and copying the corresponding fields from there. - * If no data present then set to zero. In current implementation - * add_data is called after add_filename so zero is correct on - * creation. Need to change when we have hard links / support different - * filename namespaces. (AIA) */ - NTFS_PUTS64(position + 0x28, 0); /* Allocated size */ - NTFS_PUTS64(position + 0x30, 0); /* Data size */ - NTFS_PUTU32(position + 0x38, flags); /* File flags */ - NTFS_PUTU32(position + 0x3c, 0); /* We don't use these - * features yet. */ - NTFS_PUTU8(position + 0x40, length); /* Filename length */ - NTFS_PUTU8(position + 0x41, 0); /* Only long name */ - /* FIXME: This is madness. We are defining the POSIX namespace - * for the filename here which can mean that the file will be - * invisible when in Windows NT/2k! )-: (AIA) */ - position += 0x42; - for (count = 0; count < length; count++) { - NTFS_PUTU16(position + 2 * count, filename[count]); - } - error = ntfs_create_attr(ino, ino->vol->at_file_name, 0, data, size, - &fn); - if (!error) - error = ntfs_dir_add(dir, ino, fn); - ntfs_free(data); - return error; -} - -int add_security(ntfs_inode* ino, ntfs_inode* dir) -{ - int error; - char *buf; - int size; - ntfs_attribute* attr; - ntfs_io io; - ntfs_attribute *se; - - attr = ntfs_find_attr(dir, ino->vol->at_security_descriptor, 0); - if (!attr) - return -EOPNOTSUPP; /* Need security in directory. */ - size = attr->size; - if (size > 512) - return -EOPNOTSUPP; - buf = ntfs_malloc(size); - if (!buf) - return -ENOMEM; - io.fn_get = ntfs_get; - io.fn_put = ntfs_put; - io.param = buf; - io.size = size; - error = ntfs_read_attr(dir, ino->vol->at_security_descriptor, 0, 0,&io); - if (!error && io.size != size) - ntfs_error("wrong size in add_security\n"); - if (error) { - ntfs_free(buf); - return error; - } - /* FIXME: Consider ACL inheritance. */ - error = ntfs_create_attr(ino, ino->vol->at_security_descriptor, - 0, buf, size, &se); - ntfs_free(buf); - return error; -} - -static int add_data(ntfs_inode* ino, unsigned char *data, int length) -{ - ntfs_attribute *da; - - return ntfs_create_attr(ino, ino->vol->at_data, 0, data, length, &da); -} - -/* - * We _could_ use 'dir' to help optimise inode allocation. - * - * FIXME: Need to undo what we do in ntfs_alloc_mft_record if we get an error - * further on in ntfs_alloc_inode. Either fold the two functions to allow - * proper undo or just deallocate the record from the mft bitmap. (AIA) + // TODO: Implement... + ntfs_warning(vi->i_sb, "Eeek: i_size may have changed! If you see " + "this right after a message from " + "ntfs_{prepare,commit}_{,nonresident_}write() then " + "just ignore it. Otherwise it is bad news."); + // TODO: reset i_size now! + return; +} + +/** + * ntfs_setattr - called from notify_change() when an attribute is being changed + * @dentry: dentry whose attributes to change + * @attr: structure describing the attributes and the changes + * + * We have to trap VFS attempts to truncate the file described by @dentry as + * soon as possible, because we do not implement changes in i_size yet. So we + * abort all i_size changes here. + * + * Called with ->i_sem held. + * + * Basically this is a copy of generic notify_change() and inode_setattr() + * functionality, except we intercept and abort changes in i_size. */ -int ntfs_alloc_inode(ntfs_inode *dir, ntfs_inode *result, const char *filename, - int namelen, ntfs_u32 flags) +int ntfs_setattr(struct dentry *dentry, struct iattr *attr) { - ntfs_volume *vol = dir->vol; + struct inode *vi; int err; - ntfs_u8 buffer[2]; - ntfs_io io; + unsigned int ia_valid = attr->ia_valid; - err = ntfs_alloc_mft_record(vol, &(result->i_number)); - if (err) { - if (err == -ENOSPC) - ntfs_error("%s(): No free inodes.\n", __FUNCTION__); - return err; - } - /* Get the sequence number. */ - io.fn_put = ntfs_put; - io.fn_get = ntfs_get; - io.param = buffer; - io.size = 2; - err = ntfs_read_attr(vol->mft_ino, vol->at_data, 0, - ((__s64)result->i_number << vol->mft_record_size_bits) - + 0x10, &io); - // FIXME: We are leaving the MFT in inconsistent state! (AIA) + vi = dentry->d_inode; + + err = inode_change_ok(vi, attr); if (err) return err; - /* Increment the sequence number skipping zero. */ - result->sequence_number = (NTFS_GETU16(buffer) + 1) & 0xffff; - if (!result->sequence_number) - result->sequence_number++; - result->vol = vol; - result->attr_count = 0; - result->attrs = 0; - result->record_count = 1; - result->records = ntfs_calloc(8 * sizeof(int)); - if (!result->records) - goto mem_err_out; - result->records[0] = result->i_number; - result->attr = ntfs_calloc(vol->mft_record_size); - if (!result->attr) { - ntfs_free(result->records); - result->records = NULL; - goto mem_err_out; - } - ntfs_fill_mft_header(result->attr, vol->mft_record_size, - result->sequence_number, 1, 1); - err = add_standard_information(result); - if (!err) - err = add_filename(result, dir, filename, namelen, flags); - if (!err) - err = add_security(result, dir); - // FIXME: We are leaving the MFT in inconsistent state on error! (AIA) - return err; -mem_err_out: - // FIXME: We are leaving the MFT in inconsistent state! (AIA) - result->record_count = 0; - result->attr = NULL; - return -ENOMEM; -} -int ntfs_alloc_file(ntfs_inode *dir, ntfs_inode *result, char *filename, - int namelen) -{ - int err; + if ((ia_valid & ATTR_UID && attr->ia_uid != vi->i_uid) || + (ia_valid & ATTR_GID && attr->ia_gid != vi->i_gid)) { + err = DQUOT_TRANSFER(vi, attr) ? -EDQUOT : 0; + if (err) + return err; + } + + lock_kernel(); + + if (ia_valid & ATTR_SIZE) { + ntfs_error(vi->i_sb, "Changes in i_size are not supported " + "yet. Sorry."); + // TODO: Implement... + // err = vmtruncate(vi, attr->ia_size); + err = -EOPNOTSUPP; + if (err) + goto trunc_err; + } + + if (ia_valid & ATTR_UID) + vi->i_uid = attr->ia_uid; + if (ia_valid & ATTR_GID) + vi->i_gid = attr->ia_gid; + if (ia_valid & ATTR_ATIME) + vi->i_atime = attr->ia_atime; + if (ia_valid & ATTR_MTIME) + vi->i_mtime = attr->ia_mtime; + if (ia_valid & ATTR_CTIME) + vi->i_ctime = attr->ia_ctime; + if (ia_valid & ATTR_MODE) { + vi->i_mode = attr->ia_mode; + if (!in_group_p(vi->i_gid) && + !capable(CAP_FSETID)) + vi->i_mode &= ~S_ISGID; + } + mark_inode_dirty(vi); + +trunc_err: + + unlock_kernel(); - err = ntfs_alloc_inode(dir, result, filename, namelen, 0); - if (!err) - err = add_data(result, 0, 0); return err; } +#endif + diff -urN linux-2.4.24-vanilla/fs/ntfs/inode.h linux-2.4.24-ntfs-2.1.6a/fs/ntfs/inode.h --- linux-2.4.24-vanilla/fs/ntfs/inode.h 2001-09-08 20:24:40.000000000 +0100 +++ linux-2.4.24-ntfs-2.1.6a/fs/ntfs/inode.h 2004-01-21 14:31:43.000000000 +0000 @@ -1,58 +1,278 @@ /* - * inode.h - Header file for inode.c + * inode.h - Defines for inode structures NTFS Linux kernel driver. Part of + * the Linux-NTFS project. * - * Copyright (C) 1997 Régis Duchesne - * Copyright (C) 1998 Martin von Löwis - * Copyright (c) 2001 Anton Altparmakov (AIA) + * Copyright (c) 2001-2003 Anton Altaparmakov + * Copyright (c) 2002 Richard Russon + * + * This program/include file is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as published + * by the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program/include file is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program (in the main directory of the Linux-NTFS + * distribution in the file COPYING); if not, write to the Free Software + * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _LINUX_NTFS_INODE_H +#define _LINUX_NTFS_INODE_H + +#include + +#include "layout.h" +#include "volume.h" + +typedef struct _ntfs_inode ntfs_inode; + +/* + * The NTFS in-memory inode structure. It is just used as an extension to the + * fields already provided in the VFS inode. + */ + +/* + * FIXME: WARNING: If you change this structure you must also update + * include/linux/ntfs_fs_i.h or ntfs will blow up!!! + */ +struct _ntfs_inode { + s64 initialized_size; /* Copy from the attribute record. */ + s64 allocated_size; /* Copy from the attribute record. */ + unsigned long state; /* NTFS specific flags describing this inode. + See ntfs_inode_state_bits below. */ + unsigned long mft_no; /* Number of the mft record / inode. */ + u16 seq_no; /* Sequence number of the mft record. */ + atomic_t count; /* Inode reference count for book keeping. */ + ntfs_volume *vol; /* Pointer to the ntfs volume of this inode. */ + /* + * If NInoAttr() is true, the below fields describe the attribute which + * this fake inode belongs to. The actual inode of this attribute is + * pointed to by base_ntfs_ino and nr_extents is always set to -1 (see + * below). For real inodes, we also set the type (AT_DATA for files and + * AT_INDEX_ALLOCATION for directories), with the name = NULL and + * name_len = 0 for files and name = I30 (global constant) and + * name_len = 4 for directories. + */ + ATTR_TYPES type; /* Attribute type of this fake inode. */ + uchar_t *name; /* Attribute name of this fake inode. */ + u32 name_len; /* Attribute name length of this fake inode. */ + run_list run_list; /* If state has the NI_NonResident bit set, + the run list of the unnamed data attribute + (if a file) or of the index allocation + attribute (directory) or of the attribute + described by the fake inode (if NInoAttr()). + If run_list.rl is NULL, the run list has not + been read in yet or has been unmapped. If + NI_NonResident is clear, the attribute is + resident (file and fake inode) or there is + no $I30 index allocation attribute + (small directory). In the latter case + run_list.rl is always NULL.*/ + /* + * The following fields are only valid for real inodes and extent + * inodes. + */ + struct semaphore mrec_lock; /* Lock for serializing access to the + mft record belonging to this inode. */ + struct page *page; /* The page containing the mft record of the + inode. This should only be touched by the + (un)map_mft_record*() functions. */ + int page_ofs; /* Offset into the page at which the mft record + begins. This should only be touched by the + (un)map_mft_record*() functions. */ + /* + * Attribute list support (only for use by the attribute lookup + * functions). Setup during read_inode for all inodes with attribute + * lists. Only valid if NI_AttrList is set in state, and attr_list_rl is + * further only valid if NI_AttrListNonResident is set. + */ + u32 attr_list_size; /* Length of attribute list value in bytes. */ + u8 *attr_list; /* Attribute list value itself. */ + run_list attr_list_rl; /* Run list for the attribute list value. */ + union { + struct { /* It is a directory or $MFT. */ + struct inode *bmp_ino; /* Attribute inode for the + directory index $BITMAP. */ + u32 block_size; /* Size of an index block. */ + u32 vcn_size; /* Size of a vcn in this + directory index. */ + u8 block_size_bits; /* Log2 of the above. */ + u8 vcn_size_bits; /* Log2 of the above. */ + } index; + struct { /* It is a compressed file or fake inode. */ + s64 size; /* Copy of compressed_size from + $DATA. */ + u32 block_size; /* Size of a compression block + (cb). */ + u8 block_size_bits; /* Log2 of the size of a cb. */ + u8 block_clusters; /* Number of clusters per cb. */ + } compressed; + } itype; + struct semaphore extent_lock; /* Lock for accessing/modifying the + below . */ + s32 nr_extents; /* For a base mft record, the number of attached extent + inodes (0 if none), for extent records and for fake + inodes describing an attribute this is -1. */ + union { /* This union is only used if nr_extents != 0. */ + ntfs_inode **extent_ntfs_inos; /* For nr_extents > 0, array of + the ntfs inodes of the extent + mft records belonging to + this base inode which have + been loaded. */ + ntfs_inode *base_ntfs_ino; /* For nr_extents == -1, the + ntfs inode of the base mft + record. For fake inodes, the + real (base) inode to which + the attribute belongs. */ + } ext; +}; + +/* + * Defined bits for the state field in the ntfs_inode structure. + * (f) = files only, (d) = directories only, (a) = attributes/fake inodes only + */ +typedef enum { + NI_Dirty, /* 1: Mft record needs to be written to disk. */ + NI_AttrList, /* 1: Mft record contains an attribute list. */ + NI_AttrListNonResident, /* 1: Attribute list is non-resident. Implies + NI_AttrList is set. */ + + NI_Attr, /* 1: Fake inode for attribute i/o. + 0: Real inode or extent inode. */ + + NI_MstProtected, /* 1: Attribute is protected by MST fixups. + 0: Attribute is not protected by fixups. */ + NI_NonResident, /* 1: Unnamed data attr is non-resident (f). + 1: Attribute is non-resident (a). */ + NI_IndexAllocPresent = NI_NonResident, /* 1: $I30 index alloc attr is + present (d). */ + NI_Compressed, /* 1: Unnamed data attr is compressed (f). + 1: Create compressed files by default (d). + 1: Attribute is compressed (a). */ + NI_Encrypted, /* 1: Unnamed data attr is encrypted (f). + 1: Create encrypted files by default (d). + 1: Attribute is encrypted (a). */ + NI_Sparse, /* 1: Unnamed data attr is sparse (f). + 1: Create sparse files by default (d). + 1: Attribute is sparse (a). */ + NI_New, /* 1: The inode has just been allocated. */ +} ntfs_inode_state_bits; + +/* + * NOTE: We should be adding dirty mft records to a list somewhere and they + * should be independent of the (ntfs/vfs) inode structure so that an inode can + * be removed but the record can be left dirty for syncing later. */ -ntfs_attribute *ntfs_find_attr(ntfs_inode *ino, int type, char *name); +/* + * Macro tricks to expand the NInoFoo(), NInoSetFoo(), and NInoClearFoo() + * functions. + */ +#define NINO_FNS(flag) \ +static inline int NIno##flag(ntfs_inode *ni) \ +{ \ + return test_bit(NI_##flag, &(ni)->state); \ +} \ +static inline void NInoSet##flag(ntfs_inode *ni) \ +{ \ + set_bit(NI_##flag, &(ni)->state); \ +} \ +static inline void NInoClear##flag(ntfs_inode *ni) \ +{ \ + clear_bit(NI_##flag, &(ni)->state); \ +} -int ntfs_read_attr(ntfs_inode *ino, int type, char *name, __s64 offset, - ntfs_io *buf); +/* Emit the ntfs inode bitops functions. */ +NINO_FNS(Dirty) +NINO_FNS(AttrList) +NINO_FNS(AttrListNonResident) +NINO_FNS(Attr) +NINO_FNS(MstProtected) +NINO_FNS(NonResident) +NINO_FNS(IndexAllocPresent) +NINO_FNS(Compressed) +NINO_FNS(Encrypted) +NINO_FNS(Sparse) -int ntfs_write_attr(ntfs_inode *ino, int type, char *name, __s64 offset, - ntfs_io *buf); +/* + * The full structure containing a ntfs_inode and a vfs struct inode. Used for + * all real and fake inodes but not for extent inodes which lack the vfs struct + * inode. + */ +typedef struct { + ntfs_inode ntfs_inode; + struct inode vfs_inode; /* The vfs inode structure. */ +} big_ntfs_inode; + +/** + * NTFS_I - return the ntfs inode given a vfs inode + * @inode: VFS inode + * + * NTFS_I() returns the ntfs inode associated with the VFS @inode. + */ +static inline ntfs_inode *NTFS_I(struct inode *inode) +{ + return (ntfs_inode *)&inode->u.ntfs_i; +} -int ntfs_init_inode(ntfs_inode *ino, ntfs_volume *vol, int inum); +static inline struct inode *VFS_I(ntfs_inode *ni) +{ + return (struct inode *)((u8*)ni - + ((u8 *)&(((struct inode *)NULL)->u.ntfs_i) - (u8 *)NULL)); +} -void ntfs_clear_inode(ntfs_inode *ino); +/** + * ntfs_attr - ntfs in memory attribute structure + * @mft_no: mft record number of the base mft record of this attribute + * @name: Unicode name of the attribute (NULL if unnamed) + * @name_len: length of @name in Unicode characters (0 if unnamed) + * @type: attribute type (see layout.h) + * + * This structure exists only to provide a small structure for the + * ntfs_{attr_}iget()/ntfs_test_inode()/ntfs_init_locked_inode() mechanism. + * + * NOTE: Elements are ordered by size to make the structure as compact as + * possible on all architectures. + */ +typedef struct { + unsigned long mft_no; + uchar_t *name; + u32 name_len; + ATTR_TYPES type; + struct inode *base_vi; +} ntfs_attr; -int ntfs_check_mft_record(ntfs_volume *vol, char *record); +extern struct inode *ntfs_iget(struct super_block *sb, unsigned long mft_no); +extern struct inode *ntfs_attr_iget(struct inode *base_vi, ATTR_TYPES type, + uchar_t *name, u32 name_len); -int ntfs_alloc_inode(ntfs_inode *dir, ntfs_inode *result, const char *filename, - int namelen, ntfs_u32); +extern void ntfs_clear_big_inode(struct inode *vi); -int ntfs_alloc_file(ntfs_inode *dir, ntfs_inode *result, char *filename, - int namelen); +extern ntfs_inode *ntfs_new_extent_inode(struct super_block *sb, + unsigned long mft_no); +extern void ntfs_clear_extent_inode(ntfs_inode *ni); -int ntfs_update_inode(ntfs_inode *ino); +extern void ntfs_read_locked_inode(struct inode *vi, ntfs_attr *na); +extern void ntfs_read_inode_mount(struct inode *vi); -int ntfs_vcn_to_lcn(ntfs_inode *ino, int vcn); +extern void ntfs_dirty_inode(struct inode *vi); -int ntfs_readwrite_attr(ntfs_inode *ino, ntfs_attribute *attr, __s64 offset, - ntfs_io *dest); +extern void ntfs_put_inode(struct inode *vi); -int ntfs_allocate_attr_number(ntfs_inode *ino, int *result); +extern int ntfs_show_options(struct seq_file *sf, struct vfsmount *mnt); -int ntfs_decompress_run(unsigned char **data, int *length, - ntfs_cluster_t *cluster, int *ctype); +#ifdef NTFS_RW -void ntfs_decompress(unsigned char *dest, unsigned char *src, ntfs_size_t l); +extern void ntfs_truncate(struct inode *vi); -int splice_runlists(ntfs_runlist **rl1, int *r1len, const ntfs_runlist *rl2, - int r2len); +extern int ntfs_setattr(struct dentry *dentry, struct iattr *attr); -/* - * NOTE: Neither of the ntfs_*_bit functions are atomic! But we don't need - * them atomic at present as we never operate on shared/cached bitmaps. - */ -static __inline__ int ntfs_test_and_set_bit(unsigned char *byte, const int bit) -{ - unsigned char *ptr = byte + (bit >> 3); - int b = 1 << (bit & 7); - int oldbit = *ptr & b ? 1 : 0; - *ptr |= b; - return oldbit; -} +#endif + +#endif /* _LINUX_NTFS_FS_INODE_H */ diff -urN linux-2.4.24-vanilla/fs/ntfs/kcompat.h linux-2.4.24-ntfs-2.1.6a/fs/ntfs/kcompat.h --- linux-2.4.24-vanilla/fs/ntfs/kcompat.h 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.4.24-ntfs-2.1.6a/fs/ntfs/kcompat.h 2004-01-21 14:31:43.000000000 +0000 @@ -0,0 +1,86 @@ +/* + * kcompat.h - Various defines needed to easier sync with the 2.5.x version. + * Part of the Linux-NTFS project. Ported from the misc part of + * the 2.5.x kernel. + * + * Copyright (c) 2002 Pawel Kot. + * + * This program/include file is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as published + * by the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program/include file is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program (in the main directory of the Linux-NTFS + * distribution in the file COPYING); if not, write to the Free Software + * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _LINUX_NTFS_KCOMPAT_H +#define _LINUX_NTFS_KCOMPAT_H + +#include +#include + +#ifndef MAX_BUF_PER_PAGE +# define MAX_BUF_PER_PAGE (PAGE_CACHE_SIZE / 512) +#endif + +#define BUG_ON(condition) do { if (unlikely((condition)!=0)) BUG(); } while(0) + +/* Page cache limit. The filesystems should put that into their s_maxbytes + limits, otherwise bad things can happen in VM. */ +#if BITS_PER_LONG==32 +# define MAX_LFS_FILESIZE (((u64)PAGE_CACHE_SIZE << (BITS_PER_LONG-1))-1) +#elif BITS_PER_LONG==64 +# define MAX_LFS_FILESIZE 0x7fffffffffffffff +#endif + +#define PageUptodate(page) Page_Uptodate(page) +#define page_buffers(page) (page)->buffers +#define page_has_buffers(page) ((page)->buffers != NULL) + +#include + +/* + * macro tricks to expand the set_buffer_foo(), clear_buffer_foo() + * functions. + */ +#define BUFFER_FNS(bit, name) \ +static inline void set_buffer_##name(struct buffer_head *bh) \ +{ \ + set_bit(BH_##bit, &(bh)->b_state); \ +} \ +static inline void clear_buffer_##name(struct buffer_head *bh) \ +{ \ + clear_bit(BH_##bit, &(bh)->b_state); \ +} + +/* + * test_set_buffer_foo(), clear_set_buffer_foo() + */ +#define TAS_BUFFER_FNS(bit, name) \ +static inline int test_set_buffer_##name(struct buffer_head *bh) \ +{ \ + return test_and_set_bit(BH_##bit, &(bh)->b_state); \ +} \ +static inline int test_clear_buffer_##name(struct buffer_head *bh) \ +{ \ + return test_and_clear_bit(BH_##bit, &(bh)->b_state); \ +} + +BUFFER_FNS(Uptodate, uptodate) +BUFFER_FNS(Dirty, dirty) +TAS_BUFFER_FNS(Dirty, dirty) +BUFFER_FNS(Lock, locked) +TAS_BUFFER_FNS(Lock, locked) +BUFFER_FNS(Mapped, mapped) +BUFFER_FNS(New, new) +BUFFER_FNS(Async, async) + +#endif /* _LINUX_NTFS_KCOMPAT_H */ diff -urN linux-2.4.24-vanilla/fs/ntfs/layout.h linux-2.4.24-ntfs-2.1.6a/fs/ntfs/layout.h --- linux-2.4.24-vanilla/fs/ntfs/layout.h 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.4.24-ntfs-2.1.6a/fs/ntfs/layout.h 2004-01-21 14:31:43.000000000 +0000 @@ -0,0 +1,2258 @@ +/* + * layout.h - All NTFS associated on-disk structures. Part of the Linux-NTFS + * project. + * + * Copyright (c) 2001-2003 Anton Altaparmakov + * Copyright (c) 2002 Richard Russon + * + * This program/include file is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as published + * by the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program/include file is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program (in the main directory of the Linux-NTFS + * distribution in the file COPYING); if not, write to the Free Software + * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _LINUX_NTFS_LAYOUT_H +#define _LINUX_NTFS_LAYOUT_H + +#include +#include +#include +#include + +#include "types.h" + +/* + * Constant endianness conversion defines. + */ +#define const_le16_to_cpu(x) __constant_le16_to_cpu(x) +#define const_le32_to_cpu(x) __constant_le32_to_cpu(x) +#define const_le64_to_cpu(x) __constant_le64_to_cpu(x) + +#define const_cpu_to_le16(x) __constant_cpu_to_le16(x) +#define const_cpu_to_le32(x) __constant_cpu_to_le32(x) +#define const_cpu_to_le64(x) __constant_cpu_to_le64(x) + +/* The NTFS oem_id "NTFS "*/ +#define magicNTFS const_cpu_to_le64(0x202020205346544eULL) + +/* + * Location of bootsector on partition: + * The standard NTFS_BOOT_SECTOR is on sector 0 of the partition. + * On NT4 and above there is one backup copy of the boot sector to + * be found on the last sector of the partition (not normally accessible + * from within Windows as the bootsector contained number of sectors + * value is one less than the actual value!). + * On versions of NT 3.51 and earlier, the backup copy was located at + * number of sectors/2 (integer divide), i.e. in the middle of the volume. + */ + +/* + * BIOS parameter block (bpb) structure. + */ +typedef struct { + u16 bytes_per_sector; /* Size of a sector in bytes. */ + u8 sectors_per_cluster; /* Size of a cluster in sectors. */ + u16 reserved_sectors; /* zero */ + u8 fats; /* zero */ + u16 root_entries; /* zero */ + u16 sectors; /* zero */ + u8 media_type; /* 0xf8 = hard disk */ + u16 sectors_per_fat; /* zero */ + u16 sectors_per_track; /* irrelevant */ + u16 heads; /* irrelevant */ + u32 hidden_sectors; /* zero */ + u32 large_sectors; /* zero */ +} __attribute__ ((__packed__)) BIOS_PARAMETER_BLOCK; + +/* + * NTFS boot sector structure. + */ +typedef struct { + u8 jump[3]; /* Irrelevant (jump to boot up code).*/ + u64 oem_id; /* Magic "NTFS ". */ + BIOS_PARAMETER_BLOCK bpb; /* See BIOS_PARAMETER_BLOCK. */ + u8 unused[4]; /* zero, NTFS diskedit.exe states that + this is actually: + __u8 physical_drive; // 0x80 + __u8 current_head; // zero + __u8 extended_boot_signature; + // 0x80 + __u8 unused; // zero + */ +/*0x28*/s64 number_of_sectors; /* Number of sectors in volume. Gives + maximum volume size of 2^63 sectors. + Assuming standard sector size of 512 + bytes, the maximum byte size is + approx. 4.7x10^21 bytes. (-; */ + s64 mft_lcn; /* Cluster location of mft data. */ + s64 mftmirr_lcn; /* Cluster location of copy of mft. */ + s8 clusters_per_mft_record; /* Mft record size in clusters. */ + u8 reserved0[3]; /* zero */ + s8 clusters_per_index_record; /* Index block size in clusters. */ + u8 reserved1[3]; /* zero */ + u64 volume_serial_number; /* Irrelevant (serial number). */ + u32 checksum; /* Boot sector checksum. */ +/*0x54*/u8 bootstrap[426]; /* Irrelevant (boot up code). */ + u16 end_of_sector_marker; /* End of bootsector magic. Always is + 0xaa55 in little endian. */ +/* sizeof() = 512 (0x200) bytes */ +} __attribute__ ((__packed__)) NTFS_BOOT_SECTOR; + +/* + * Magic identifiers present at the beginning of all ntfs record containing + * records (like mft records for example). + */ +typedef enum { + magic_BAAD = const_cpu_to_le32(0x44414142), /* BAAD == corrupt record */ + magic_CHKD = const_cpu_to_le32(0x424b4843), /* CHKD == chkdsk ??? */ + magic_FILE = const_cpu_to_le32(0x454c4946), /* FILE == mft entry */ + magic_HOLE = const_cpu_to_le32(0x454c4f48), /* HOLE == ? (NTFS 3.0+?) */ + magic_INDX = const_cpu_to_le32(0x58444e49), /* INDX == index buffer */ +} NTFS_RECORD_TYPES; + +/* + * Generic magic comparison macros. Finally found a use for the ## preprocessor + * operator! (-8 + */ +#define is_magic(x, m) ( (u32)(x) == magic_##m ) +#define is_magicp(p, m) ( *(u32*)(p) == magic_##m ) + +/* + * Specialised magic comparison macros. + */ +#define is_baad_record(x) ( is_magic (x, BAAD) ) +#define is_baad_recordp(p) ( is_magicp(p, BAAD) ) +#define is_chkd_record(x) ( is_magic (x, CHKD) ) +#define is_chkd_recordp(p) ( is_magicp(p, CHKD) ) +#define is_file_record(x) ( is_magic (x, FILE) ) +#define is_file_recordp(p) ( is_magicp(p, FILE) ) +#define is_hole_record(x) ( is_magic (x, HOLE) ) +#define is_hole_recordp(p) ( is_magicp(p, HOLE) ) +#define is_indx_record(x) ( is_magic (x, INDX) ) +#define is_indx_recordp(p) ( is_magicp(p, INDX) ) + +#define is_mft_record(x) ( is_file_record(x) ) +#define is_mft_recordp(p) ( is_file_recordp(p) ) + +/* + * The Update Sequence Array (usa) is an array of the u16 values which belong + * to the end of each sector protected by the update sequence record in which + * this array is contained. Note that the first entry is the Update Sequence + * Number (usn), a cyclic counter of how many times the protected record has + * been written to disk. The values 0 and -1 (ie. 0xffff) are not used. All + * last u16's of each sector have to be equal to the usn (during reading) or + * are set to it (during writing). If they are not, an incomplete multi sector + * transfer has occured when the data was written. + * The maximum size for the update sequence array is fixed to: + * maximum size = usa_ofs + (usa_count * 2) = 510 bytes + * The 510 bytes comes from the fact that the last u16 in the array has to + * (obviously) finish before the last u16 of the first 512-byte sector. + * This formula can be used as a consistency check in that usa_ofs + + * (usa_count * 2) has to be less than or equal to 510. + */ +typedef struct { + NTFS_RECORD_TYPES magic; /* A four-byte magic identifying the + record type and/or status. */ + u16 usa_ofs; /* Offset to the Update Sequence Array (usa) + from the start of the ntfs record. */ + u16 usa_count; /* Number of u16 sized entries in the usa + including the Update Sequence Number (usn), + thus the number of fixups is the usa_count + minus 1. */ +} __attribute__ ((__packed__)) NTFS_RECORD; + +/* + * System files mft record numbers. All these files are always marked as used + * in the bitmap attribute of the mft; presumably in order to avoid accidental + * allocation for random other mft records. Also, the sequence number for each + * of the system files is always equal to their mft record number and it is + * never modified. + */ +typedef enum { + FILE_MFT = 0, /* Master file table (mft). Data attribute + contains the entries and bitmap attribute + records which ones are in use (bit==1). */ + FILE_MFTMirr = 1, /* Mft mirror: copy of first four mft records + in data attribute. If cluster size > 4kiB, + copy of first N mft records, with + N = cluster_size / mft_record_size. */ + FILE_LogFile = 2, /* Journalling log in data attribute. */ + FILE_Volume = 3, /* Volume name attribute and volume information + attribute (flags and ntfs version). Windows + refers to this file as volume DASD (Direct + Access Storage Device). */ + FILE_AttrDef = 4, /* Array of attribute definitions in data + attribute. */ + FILE_root = 5, /* Root directory. */ + FILE_Bitmap = 6, /* Allocation bitmap of all clusters (lcns) in + data attribute. */ + FILE_Boot = 7, /* Boot sector (always at cluster 0) in data + attribute. */ + FILE_BadClus = 8, /* Contains all bad clusters in the non-resident + data attribute. */ + FILE_Secure = 9, /* Shared security descriptors in data attribute + and two indexes into the descriptors. + Appeared in Windows 2000. Before that, this + file was named $Quota but was unused. */ + FILE_UpCase = 10, /* Uppercase equivalents of all 65536 Unicode + characters in data attribute. */ + FILE_Extend = 11, /* Directory containing other system files (eg. + $ObjId, $Quota, $Reparse and $UsnJrnl). This + is new to NTFS3.0. */ + FILE_reserved12 = 12, /* Reserved for future use (records 12-15). */ + FILE_reserved13 = 13, + FILE_reserved14 = 14, + FILE_reserved15 = 15, + FILE_first_user = 16, /* First user file, used as test limit for + whether to allow opening a file or not. */ +} NTFS_SYSTEM_FILES; + +/* + * These are the so far known MFT_RECORD_* flags (16-bit) which contain + * information about the mft record in which they are present. + */ +typedef enum { + MFT_RECORD_IN_USE = const_cpu_to_le16(0x0001), + MFT_RECORD_IS_DIRECTORY = const_cpu_to_le16(0x0002), + MFT_REC_SPACE_FILLER = 0xffff /* Just to make flags 16-bit. */ +} __attribute__ ((__packed__)) MFT_RECORD_FLAGS; + +/* + * mft references (aka file references or file record segment references) are + * used whenever a structure needs to refer to a record in the mft. + * + * A reference consists of a 48-bit index into the mft and a 16-bit sequence + * number used to detect stale references. + * + * For error reporting purposes we treat the 48-bit index as a signed quantity. + * + * The sequence number is a circular counter (skipping 0) describing how many + * times the referenced mft record has been (re)used. This has to match the + * sequence number of the mft record being referenced, otherwise the reference + * is considered stale and removed (FIXME: only ntfsck or the driver itself?). + * + * If the sequence number is zero it is assumed that no sequence number + * consistency checking should be performed. + * + * FIXME: Since inodes are 32-bit as of now, the driver needs to always check + * for high_part being 0 and if not either BUG(), cause a panic() or handle + * the situation in some other way. This shouldn't be a problem as a volume has + * to become HUGE in order to need more than 32-bits worth of mft records. + * Assuming the standard mft record size of 1kb only the records (never mind + * the non-resident attributes, etc.) would require 4Tb of space on their own + * for the first 32 bits worth of records. This is only if some strange person + * doesn't decide to foul play and make the mft sparse which would be a really + * horrible thing to do as it would trash our current driver implementation. )-: + * Do I hear screams "we want 64-bit inodes!" ?!? (-; + * + * FIXME: The mft zone is defined as the first 12% of the volume. This space is + * reserved so that the mft can grow contiguously and hence doesn't become + * fragmented. Volume free space includes the empty part of the mft zone and + * when the volume's free 88% are used up, the mft zone is shrunk by a factor + * of 2, thus making more space available for more files/data. This process is + * repeated everytime there is no more free space except for the mft zone until + * there really is no more free space. + */ + +/* + * Typedef the MFT_REF as a 64-bit value for easier handling. + * Also define two unpacking macros to get to the reference (MREF) and + * sequence number (MSEQNO) respectively. + * The _LE versions are to be applied on little endian MFT_REFs. + * Note: The _LE versions will return a CPU endian formatted value! + */ +typedef enum { + MFT_REF_MASK_CPU = 0x0000ffffffffffffULL, + MFT_REF_MASK_LE = const_cpu_to_le64(0x0000ffffffffffffULL), +} MFT_REF_CONSTS; + +typedef u64 MFT_REF; + +#define MREF(x) ((unsigned long)((x) & MFT_REF_MASK_CPU)) +#define MSEQNO(x) ((u16)(((x) >> 48) & 0xffff)) +#define MREF_LE(x) ((unsigned long)(le64_to_cpu(x) & MFT_REF_MASK_CPU)) +#define MSEQNO_LE(x) ((u16)((le64_to_cpu(x) >> 48) & 0xffff)) + +#define IS_ERR_MREF(x) (((x) & 0x0000800000000000ULL) ? 1 : 0) +#define ERR_MREF(x) ((u64)((s64)(x))) +#define MREF_ERR(x) ((int)((s64)(x))) + +/* + * The mft record header present at the beginning of every record in the mft. + * This is followed by a sequence of variable length attribute records which + * is terminated by an attribute of type AT_END which is a truncated attribute + * in that it only consists of the attribute type code AT_END and none of the + * other members of the attribute structure are present. + */ +typedef struct { +/*Ofs*/ +/* 0 NTFS_RECORD; -- Unfolded here as gcc doesn't like unnamed structs. */ + NTFS_RECORD_TYPES magic;/* Usually the magic is "FILE". */ + u16 usa_ofs; /* See NTFS_RECORD definition above. */ + u16 usa_count; /* See NTFS_RECORD definition above. */ + +/* 8*/ u64 lsn; /* $LogFile sequence number for this record. + Changed every time the record is modified. */ +/* 16*/ u16 sequence_number; /* Number of times this mft record has been + reused. (See description for MFT_REF + above.) NOTE: The increment (skipping zero) + is done when the file is deleted. NOTE: If + this is zero it is left zero. */ +/* 18*/ u16 link_count; /* Number of hard links, i.e. the number of + directory entries referencing this record. + NOTE: Only used in mft base records. + NOTE: When deleting a directory entry we + check the link_count and if it is 1 we + delete the file. Otherwise we delete the + FILE_NAME_ATTR being referenced by the + directory entry from the mft record and + decrement the link_count. + FIXME: Careful with Win32 + DOS names! */ +/* 20*/ u16 attrs_offset; /* Byte offset to the first attribute in this + mft record from the start of the mft record. + NOTE: Must be aligned to 8-byte boundary. */ +/* 22*/ MFT_RECORD_FLAGS flags; /* Bit array of MFT_RECORD_FLAGS. When a file + is deleted, the MFT_RECORD_IN_USE flag is + set to zero. */ +/* 24*/ u32 bytes_in_use; /* Number of bytes used in this mft record. + NOTE: Must be aligned to 8-byte boundary. */ +/* 28*/ u32 bytes_allocated; /* Number of bytes allocated for this mft + record. This should be equal to the mft + record size. */ +/* 32*/ MFT_REF base_mft_record; /* This is zero for base mft records. + When it is not zero it is a mft reference + pointing to the base mft record to which + this record belongs (this is then used to + locate the attribute list attribute present + in the base record which describes this + extension record and hence might need + modification when the extension record + itself is modified, also locating the + attribute list also means finding the other + potential extents, belonging to the non-base + mft record). */ +/* 40*/ u16 next_attr_instance; /* The instance number that will be + assigned to the next attribute added to this + mft record. NOTE: Incremented each time + after it is used. NOTE: Every time the mft + record is reused this number is set to zero. + NOTE: The first instance number is always 0. + */ +/* sizeof() = 42 bytes */ +/* NTFS 3.1+ (Windows XP and above) introduce the following additions. */ +/* 42*/ //u16 reserved; /* Reserved/alignment. */ +/* 44*/ //u32 mft_record_number;/* Number of this mft record. */ +/* sizeof() = 48 bytes */ +/* + * When (re)using the mft record, we place the update sequence array at this + * offset, i.e. before we start with the attributes. This also makes sense, + * otherwise we could run into problems with the update sequence array + * containing in itself the last two bytes of a sector which would mean that + * multi sector transfer protection wouldn't work. As you can't protect data + * by overwriting it since you then can't get it back... + * When reading we obviously use the data from the ntfs record header. + */ +} __attribute__ ((__packed__)) MFT_RECORD; + +/* + * System defined attributes (32-bit). Each attribute type has a corresponding + * attribute name (Unicode string of maximum 64 character length) as described + * by the attribute definitions present in the data attribute of the $AttrDef + * system file. On NTFS 3.0 volumes the names are just as the types are named + * in the below enum exchanging AT_ for the dollar sign ($). If that isn't a + * revealing choice of symbol... (-; + */ +typedef enum { + AT_UNUSED = const_cpu_to_le32( 0), + AT_STANDARD_INFORMATION = const_cpu_to_le32( 0x10), + AT_ATTRIBUTE_LIST = const_cpu_to_le32( 0x20), + AT_FILE_NAME = const_cpu_to_le32( 0x30), + AT_OBJECT_ID = const_cpu_to_le32( 0x40), + AT_SECURITY_DESCRIPTOR = const_cpu_to_le32( 0x50), + AT_VOLUME_NAME = const_cpu_to_le32( 0x60), + AT_VOLUME_INFORMATION = const_cpu_to_le32( 0x70), + AT_DATA = const_cpu_to_le32( 0x80), + AT_INDEX_ROOT = const_cpu_to_le32( 0x90), + AT_INDEX_ALLOCATION = const_cpu_to_le32( 0xa0), + AT_BITMAP = const_cpu_to_le32( 0xb0), + AT_REPARSE_POINT = const_cpu_to_le32( 0xc0), + AT_EA_INFORMATION = const_cpu_to_le32( 0xd0), + AT_EA = const_cpu_to_le32( 0xe0), + AT_PROPERTY_SET = const_cpu_to_le32( 0xf0), + AT_LOGGED_UTILITY_STREAM = const_cpu_to_le32( 0x100), + AT_FIRST_USER_DEFINED_ATTRIBUTE = const_cpu_to_le32( 0x1000), + AT_END = const_cpu_to_le32(0xffffffff), +} ATTR_TYPES; + +/* + * The collation rules for sorting views/indexes/etc (32-bit). + * + * COLLATION_UNICODE_STRING - Collate Unicode strings by comparing their binary + * Unicode values, except that when a character can be uppercased, the + * upper case value collates before the lower case one. + * COLLATION_FILE_NAME - Collate file names as Unicode strings. The collation + * is done very much like COLLATION_UNICODE_STRING. In fact I have no idea + * what the difference is. Perhaps the difference is that file names + * would treat some special characters in an odd way (see + * unistr.c::ntfs_collate_names() and unistr.c::legal_ansi_char_array[] + * for what I mean but COLLATION_UNICODE_STRING would not give any special + * treatment to any characters at all, but this is speculation. + * COLLATION_NTOFS_ULONG - Sorting is done according to ascending u32 key + * values. E.g. used for $SII index in FILE_Secure, which sorts by + * security_id (u32). + * COLLATION_NTOFS_SID - Sorting is done according to ascending SID values. + * E.g. used for $O index in FILE_Extend/$Quota. + * COLLATION_NTOFS_SECURITY_HASH - Sorting is done first by ascending hash + * values and second by ascending security_id values. E.g. used for $SDH + * index in FILE_Secure. + * COLLATION_NTOFS_ULONGS - Sorting is done according to a sequence of ascending + * u32 key values. E.g. used for $O index in FILE_Extend/$ObjId, which + * sorts by object_id (16-byte), by splitting up the object_id in four + * u32 values and using them as individual keys. E.g. take the following + * two security_ids, stored as follows on disk: + * 1st: a1 61 65 b7 65 7b d4 11 9e 3d 00 e0 81 10 42 59 + * 2nd: 38 14 37 d2 d2 f3 d4 11 a5 21 c8 6b 79 b1 97 45 + * To compare them, they are split into four u32 values each, like so: + * 1st: 0xb76561a1 0x11d47b65 0xe0003d9e 0x59421081 + * 2nd: 0xd2371438 0x11d4f3d2 0x6bc821a5 0x4597b179 + * Now, it is apparent why the 2nd object_id collates after the 1st: the + * first u32 value of the 1st object_id is less than the first u32 of + * the 2nd object_id. If the first u32 values of both object_ids were + * equal then the second u32 values would be compared, etc. + */ +typedef enum { + COLLATION_BINARY = const_cpu_to_le32(0), /* Collate by binary + compare where the first byte is most + significant. */ + COLLATION_FILE_NAME = const_cpu_to_le32(1), /* Collate file names + as Unicode strings. */ + COLLATION_UNICODE_STRING = const_cpu_to_le32(2), /* Collate Unicode + strings by comparing their binary + Unicode values, except that when a + character can be uppercased, the upper + case value collates before the lower + case one. */ + COLLATION_NTOFS_ULONG = const_cpu_to_le32(16), + COLLATION_NTOFS_SID = const_cpu_to_le32(17), + COLLATION_NTOFS_SECURITY_HASH = const_cpu_to_le32(18), + COLLATION_NTOFS_ULONGS = const_cpu_to_le32(19), +} COLLATION_RULES; + +/* + * The flags (32-bit) describing attribute properties in the attribute + * definition structure. FIXME: This information is from Regis's information + * and, according to him, it is not certain and probably incomplete. + * The INDEXABLE flag is fairly certainly correct as only the file name + * attribute has this flag set and this is the only attribute indexed in NT4. + */ +typedef enum { + INDEXABLE = const_cpu_to_le32(0x02), /* Attribute can be + indexed. */ + NEED_TO_REGENERATE = const_cpu_to_le32(0x40), /* Need to regenerate + during regeneration + phase. */ + CAN_BE_NON_RESIDENT = const_cpu_to_le32(0x80), /* Attribute can be + non-resident. */ +} ATTR_DEF_FLAGS; + +/* + * The data attribute of FILE_AttrDef contains a sequence of attribute + * definitions for the NTFS volume. With this, it is supposed to be safe for an + * older NTFS driver to mount a volume containing a newer NTFS version without + * damaging it (that's the theory. In practice it's: not damaging it too much). + * Entries are sorted by attribute type. The flags describe whether the + * attribute can be resident/non-resident and possibly other things, but the + * actual bits are unknown. + */ +typedef struct { +/*hex ofs*/ +/* 0*/ uchar_t name[0x40]; /* Unicode name of the attribute. Zero + terminated. */ +/* 80*/ ATTR_TYPES type; /* Type of the attribute. */ +/* 84*/ u32 display_rule; /* Default display rule. + FIXME: What does it mean? (AIA) */ +/* 88*/ COLLATION_RULES collation_rule; /* Default collation rule. */ +/* 8c*/ ATTR_DEF_FLAGS flags; /* Flags describing the attribute. */ +/* 90*/ u64 min_size; /* Optional minimum attribute size. */ +/* 98*/ u64 max_size; /* Maximum size of attribute. */ +/* sizeof() = 0xa0 or 160 bytes */ +} __attribute__ ((__packed__)) ATTR_DEF; + +/* + * Attribute flags (16-bit). + */ +typedef enum { + ATTR_IS_COMPRESSED = const_cpu_to_le16(0x0001), + ATTR_COMPRESSION_MASK = const_cpu_to_le16(0x00ff), /* Compression + method mask. Also, first + illegal value. */ + ATTR_IS_ENCRYPTED = const_cpu_to_le16(0x4000), + ATTR_IS_SPARSE = const_cpu_to_le16(0x8000), +} __attribute__ ((__packed__)) ATTR_FLAGS; + +/* + * Attribute compression. + * + * Only the data attribute is ever compressed in the current ntfs driver in + * Windows. Further, compression is only applied when the data attribute is + * non-resident. Finally, to use compression, the maximum allowed cluster size + * on a volume is 4kib. + * + * The compression method is based on independently compressing blocks of X + * clusters, where X is determined from the compression_unit value found in the + * non-resident attribute record header (more precisely: X = 2^compression_unit + * clusters). On Windows NT/2k, X always is 16 clusters (compression_unit = 4). + * + * There are three different cases of how a compression block of X clusters + * can be stored: + * + * 1) The data in the block is all zero (a sparse block): + * This is stored as a sparse block in the run list, i.e. the run list + * entry has length = X and lcn = -1. The mapping pairs array actually + * uses a delta_lcn value length of 0, i.e. delta_lcn is not present at + * all, which is then interpreted by the driver as lcn = -1. + * NOTE: Even uncompressed files can be sparse on NTFS 3.0 volumes, then + * the same principles apply as above, except that the length is not + * restricted to being any particular value. + * + * 2) The data in the block is not compressed: + * This happens when compression doesn't reduce the size of the block + * in clusters. I.e. if compression has a small effect so that the + * compressed data still occupies X clusters, then the uncompressed data + * is stored in the block. + * This case is recognised by the fact that the run list entry has + * length = X and lcn >= 0. The mapping pairs array stores this as + * normal with a run length of X and some specific delta_lcn, i.e. + * delta_lcn has to be present. + * + * 3) The data in the block is compressed: + * The common case. This case is recognised by the fact that the run + * list entry has length L < X and lcn >= 0. The mapping pairs array + * stores this as normal with a run length of X and some specific + * delta_lcn, i.e. delta_lcn has to be present. This run list entry is + * immediately followed by a sparse entry with length = X - L and + * lcn = -1. The latter entry is to make up the vcn counting to the + * full compression block size X. + * + * In fact, life is more complicated because adjacent entries of the same type + * can be coalesced. This means that one has to keep track of the number of + * clusters handled and work on a basis of X clusters at a time being one + * block. An example: if length L > X this means that this particular run list + * entry contains a block of length X and part of one or more blocks of length + * L - X. Another example: if length L < X, this does not necessarily mean that + * the block is compressed as it might be that the lcn changes inside the block + * and hence the following run list entry describes the continuation of the + * potentially compressed block. The block would be compressed if the + * following run list entry describes at least X - L sparse clusters, thus + * making up the compression block length as described in point 3 above. (Of + * course, there can be several run list entries with small lengths so that the + * sparse entry does not follow the first data containing entry with + * length < X.) + * + * NOTE: At the end of the compressed attribute value, there most likely is not + * just the right amount of data to make up a compression block, thus this data + * is not even attempted to be compressed. It is just stored as is, unless + * the number of clusters it occupies is reduced when compressed in which case + * it is stored as a compressed compression block, complete with sparse + * clusters at the end. + */ + +/* + * Flags of resident attributes (8-bit). + */ +typedef enum { + RESIDENT_ATTR_IS_INDEXED = 0x01, /* Attribute is referenced in an index + (has implications for deleting and + modifying the attribute). */ +} __attribute__ ((__packed__)) RESIDENT_ATTR_FLAGS; + +/* + * Attribute record header. Always aligned to 8-byte boundary. + */ +typedef struct { +/*Ofs*/ +/* 0*/ ATTR_TYPES type; /* The (32-bit) type of the attribute. */ +/* 4*/ u32 length; /* Byte size of the resident part of the + attribute (aligned to 8-byte boundary). + Used to get to the next attribute. */ +/* 8*/ u8 non_resident; /* If 0, attribute is resident. + If 1, attribute is non-resident. */ +/* 9*/ u8 name_length; /* Unicode character size of name of attribute. + 0 if unnamed. */ +/* 10*/ u16 name_offset; /* If name_length != 0, the byte offset to the + beginning of the name from the attribute + record. Note that the name is stored as a + Unicode string. When creating, place offset + just at the end of the record header. Then, + follow with attribute value or mapping pairs + array, resident and non-resident attributes + respectively, aligning to an 8-byte + boundary. */ +/* 12*/ ATTR_FLAGS flags; /* Flags describing the attribute. */ +/* 14*/ u16 instance; /* The instance of this attribute record. This + number is unique within this mft record (see + MFT_RECORD/next_attribute_instance notes in + in mft.h for more details). */ +/* 16*/ union { + /* Resident attributes. */ + struct { +/* 16 */ u32 value_length; /* Byte size of attribute value. */ +/* 20 */ u16 value_offset; /* Byte offset of the attribute + value from the start of the + attribute record. When creating, + align to 8-byte boundary if we + have a name present as this might + not have a length of a multiple + of 8-bytes. */ +/* 22 */ RESIDENT_ATTR_FLAGS flags; /* See above. */ +/* 23 */ s8 reserved; /* Reserved/alignment to 8-byte + boundary. */ + } __attribute__ ((__packed__)) resident; + /* Non-resident attributes. */ + struct { +/* 16*/ VCN lowest_vcn; /* Lowest valid virtual cluster number + for this portion of the attribute value or + 0 if this is the only extent (usually the + case). - Only when an attribute list is used + does lowest_vcn != 0 ever occur. */ +/* 24*/ VCN highest_vcn; /* Highest valid vcn of this extent of + the attribute value. - Usually there is only one + portion, so this usually equals the attribute + value size in clusters minus 1. Can be -1 for + zero length files. Can be 0 for "single extent" + attributes. */ +/* 32*/ u16 mapping_pairs_offset; /* Byte offset from the + beginning of the structure to the mapping pairs + array which contains the mappings between the + vcns and the logical cluster numbers (lcns). + When creating, place this at the end of this + record header aligned to 8-byte boundary. */ +/* 34*/ u8 compression_unit; /* The compression unit expressed + as the log to the base 2 of the number of + clusters in a compression unit. 0 means not + compressed. (This effectively limits the + compression unit size to be a power of two + clusters.) WinNT4 only uses a value of 4. */ +/* 35*/ u8 reserved[5]; /* Align to 8-byte boundary. */ +/* The sizes below are only used when lowest_vcn is zero, as otherwise it would + be difficult to keep them up-to-date.*/ +/* 40*/ s64 allocated_size; /* Byte size of disk space + allocated to hold the attribute value. Always + is a multiple of the cluster size. When a file + is compressed, this field is a multiple of the + compression block size (2^compression_unit) and + it represents the logically allocated space + rather than the actual on disk usage. For this + use the compressed_size (see below). */ +/* 48*/ s64 data_size; /* Byte size of the attribute + value. Can be larger than allocated_size if + attribute value is compressed or sparse. */ +/* 56*/ s64 initialized_size; /* Byte size of initialized + portion of the attribute value. Usually equals + data_size. */ +/* sizeof(uncompressed attr) = 64*/ +/* 64*/ s64 compressed_size; /* Byte size of the attribute + value after compression. Only present when + compressed. Always is a multiple of the + cluster size. Represents the actual amount of + disk space being used on the disk. */ +/* sizeof(compressed attr) = 72*/ + } __attribute__ ((__packed__)) non_resident; + } __attribute__ ((__packed__)) data; +} __attribute__ ((__packed__)) ATTR_RECORD; + +typedef ATTR_RECORD ATTR_REC; + +/* + * File attribute flags (32-bit). + */ +typedef enum { + /* + * These flags are only present in the STANDARD_INFORMATION attribute + * (in the field file_attributes). + */ + FILE_ATTR_READONLY = const_cpu_to_le32(0x00000001), + FILE_ATTR_HIDDEN = const_cpu_to_le32(0x00000002), + FILE_ATTR_SYSTEM = const_cpu_to_le32(0x00000004), + /* Old DOS volid. Unused in NT. = cpu_to_le32(0x00000008), */ + + FILE_ATTR_DIRECTORY = const_cpu_to_le32(0x00000010), + /* FILE_ATTR_DIRECTORY is not considered valid in NT. It is reserved + for the DOS SUBDIRECTORY flag. */ + FILE_ATTR_ARCHIVE = const_cpu_to_le32(0x00000020), + FILE_ATTR_DEVICE = const_cpu_to_le32(0x00000040), + FILE_ATTR_NORMAL = const_cpu_to_le32(0x00000080), + + FILE_ATTR_TEMPORARY = const_cpu_to_le32(0x00000100), + FILE_ATTR_SPARSE_FILE = const_cpu_to_le32(0x00000200), + FILE_ATTR_REPARSE_POINT = const_cpu_to_le32(0x00000400), + FILE_ATTR_COMPRESSED = const_cpu_to_le32(0x00000800), + + FILE_ATTR_OFFLINE = const_cpu_to_le32(0x00001000), + FILE_ATTR_NOT_CONTENT_INDEXED = const_cpu_to_le32(0x00002000), + FILE_ATTR_ENCRYPTED = const_cpu_to_le32(0x00004000), + + FILE_ATTR_VALID_FLAGS = const_cpu_to_le32(0x00007fb7), + /* FILE_ATTR_VALID_FLAGS masks out the old DOS VolId and the + FILE_ATTR_DEVICE and preserves everything else. This mask + is used to obtain all flags that are valid for reading. */ + FILE_ATTR_VALID_SET_FLAGS = const_cpu_to_le32(0x000031a7), + /* FILE_ATTR_VALID_SET_FLAGS masks out the old DOS VolId, the + F_A_DEVICE, F_A_DIRECTORY, F_A_SPARSE_FILE, F_A_REPARSE_POINT, + F_A_COMPRESSED and F_A_ENCRYPTED and preserves the rest. This mask + is used to to obtain all flags that are valid for setting. */ + + /* + * These flags are only present in the FILE_NAME attribute (in the + * field file_attributes). + */ + FILE_ATTR_DUP_FILE_NAME_INDEX_PRESENT = const_cpu_to_le32(0x10000000), + /* This is a copy of the corresponding bit from the mft record, telling + us whether this is a directory or not, i.e. whether it has an + index root attribute or not. */ + FILE_ATTR_DUP_VIEW_INDEX_PRESENT = const_cpu_to_le32(0x20000000), + /* This is a copy of the corresponding bit from the mft record, telling + us whether this file has a view index present (eg. object id index, + quota index, one of the security indexes or the encrypting file + system related indexes). */ +} FILE_ATTR_FLAGS; + +/* + * NOTE on times in NTFS: All times are in MS standard time format, i.e. they + * are the number of 100-nanosecond intervals since 1st January 1601, 00:00:00 + * universal coordinated time (UTC). (In Linux time starts 1st January 1970, + * 00:00:00 UTC and is stored as the number of 1-second intervals since then.) + */ + +/* + * Attribute: Standard information (0x10). + * + * NOTE: Always resident. + * NOTE: Present in all base file records on a volume. + * NOTE: There is conflicting information about the meaning of each of the time + * fields but the meaning as defined below has been verified to be + * correct by practical experimentation on Windows NT4 SP6a and is hence + * assumed to be the one and only correct interpretation. + */ +typedef struct { +/*Ofs*/ +/* 0*/ s64 creation_time; /* Time file was created. Updated when + a filename is changed(?). */ +/* 8*/ s64 last_data_change_time; /* Time the data attribute was last + modified. */ +/* 16*/ s64 last_mft_change_time; /* Time this mft record was last + modified. */ +/* 24*/ s64 last_access_time; /* Approximate time when the file was + last accessed (obviously this is not + updated on read-only volumes). In + Windows this is only updated when + accessed if some time delta has + passed since the last update. Also, + last access times updates can be + disabled altogether for speed. */ +/* 32*/ FILE_ATTR_FLAGS file_attributes; /* Flags describing the file. */ +/* 36*/ union { + /* NTFS 1.2 */ + struct { + /* 36*/ u8 reserved12[12]; /* Reserved/alignment to 8-byte + boundary. */ + } __attribute__ ((__packed__)) v1; + /* sizeof() = 48 bytes */ + /* NTFS 3.x */ + struct { +/* + * If a volume has been upgraded from a previous NTFS version, then these + * fields are present only if the file has been accessed since the upgrade. + * Recognize the difference by comparing the length of the resident attribute + * value. If it is 48, then the following fields are missing. If it is 72 then + * the fields are present. Maybe just check like this: + * if (resident.ValueLength < sizeof(STANDARD_INFORMATION)) { + * Assume NTFS 1.2- format. + * If (volume version is 3.x) + * Upgrade attribute to NTFS 3.x format. + * else + * Use NTFS 1.2- format for access. + * } else + * Use NTFS 3.x format for access. + * Only problem is that it might be legal to set the length of the value to + * arbitrarily large values thus spoiling this check. - But chkdsk probably + * views that as a corruption, assuming that it behaves like this for all + * attributes. + */ + /* 36*/ u32 maximum_versions; /* Maximum allowed versions for + file. Zero if version numbering is disabled. */ + /* 40*/ u32 version_number; /* This file's version (if any). + Set to zero if maximum_versions is zero. */ + /* 44*/ u32 class_id; /* Class id from bidirectional + class id index (?). */ + /* 48*/ u32 owner_id; /* Owner_id of the user owning + the file. Translate via $Q index in FILE_Extend + /$Quota to the quota control entry for the user + owning the file. Zero if quotas are disabled. */ + /* 52*/ u32 security_id; /* Security_id for the file. + Translate via $SII index and $SDS data stream + in FILE_Secure to the security descriptor. */ + /* 56*/ u64 quota_charged; /* Byte size of the charge to + the quota for all streams of the file. Note: Is + zero if quotas are disabled. */ + /* 64*/ u64 usn; /* Last update sequence number + of the file. This is a direct index into the + change (aka usn) journal file. It is zero if + the usn journal is disabled. + NOTE: To disable the journal need to delete + the journal file itself and to then walk the + whole mft and set all Usn entries in all mft + records to zero! (This can take a while!) + The journal is FILE_Extend/$UsnJrnl. Win2k + will recreate the journal and initiate + logging if necessary when mounting the + partition. This, in contrast to disabling the + journal is a very fast process, so the user + won't even notice it. */ + } __attribute__ ((__packed__)) v3; + /* sizeof() = 72 bytes (NTFS 3.x) */ + } __attribute__ ((__packed__)) ver; +} __attribute__ ((__packed__)) STANDARD_INFORMATION; + +/* + * Attribute: Attribute list (0x20). + * + * - Can be either resident or non-resident. + * - Value consists of a sequence of variable length, 8-byte aligned, + * ATTR_LIST_ENTRY records. + * - The list is not terminated by anything at all! The only way to know when + * the end is reached is to keep track of the current offset and compare it to + * the attribute value size. + * - The attribute list attribute contains one entry for each attribute of + * the file in which the list is located, except for the list attribute + * itself. The list is sorted: first by attribute type, second by attribute + * name (if present), third by instance number. The extents of one + * non-resident attribute (if present) immediately follow after the initial + * extent. They are ordered by lowest_vcn and have their instace set to zero. + * It is not allowed to have two attributes with all sorting keys equal. + * - Further restrictions: + * - If not resident, the vcn to lcn mapping array has to fit inside the + * base mft record. + * - The attribute list attribute value has a maximum size of 256kb. This + * is imposed by the Windows cache manager. + * - Attribute lists are only used when the attributes of mft record do not + * fit inside the mft record despite all attributes (that can be made + * non-resident) having been made non-resident. This can happen e.g. when: + * - File has a large number of hard links (lots of file name + * attributes present). + * - The mapping pairs array of some non-resident attribute becomes so + * large due to fragmentation that it overflows the mft record. + * - The security descriptor is very complex (not applicable to + * NTFS 3.0 volumes). + * - There are many named streams. + */ +typedef struct { +/*Ofs*/ +/* 0*/ ATTR_TYPES type; /* Type of referenced attribute. */ +/* 4*/ u16 length; /* Byte size of this entry (8-byte aligned). */ +/* 6*/ u8 name_length; /* Size in Unicode chars of the name of the + attribute or 0 if unnamed. */ +/* 7*/ u8 name_offset; /* Byte offset to beginning of attribute name + (always set this to where the name would + start even if unnamed). */ +/* 8*/ VCN lowest_vcn; /* Lowest virtual cluster number of this portion + of the attribute value. This is usually 0. It + is non-zero for the case where one attribute + does not fit into one mft record and thus + several mft records are allocated to hold + this attribute. In the latter case, each mft + record holds one extent of the attribute and + there is one attribute list entry for each + extent. NOTE: This is DEFINITELY a signed + value! The windows driver uses cmp, followed + by jg when comparing this, thus it treats it + as signed. */ +/* 16*/ MFT_REF mft_reference; /* The reference of the mft record holding + the ATTR_RECORD for this portion of the + attribute value. */ +/* 24*/ u16 instance; /* If lowest_vcn = 0, the instance of the + attribute being referenced; otherwise 0. */ +/* 26*/ uchar_t name[0]; /* Use when creating only. When reading use + name_offset to determine the location of the + name. */ +/* sizeof() = 26 + (attribute_name_length * 2) bytes */ +} __attribute__ ((__packed__)) ATTR_LIST_ENTRY; + +/* + * The maximum allowed length for a file name. + */ +#define MAXIMUM_FILE_NAME_LENGTH 255 + +/* + * Possible namespaces for filenames in ntfs (8-bit). + */ +typedef enum { + FILE_NAME_POSIX = 0x00, + /* This is the largest namespace. It is case sensitive and + allows all Unicode characters except for: '\0' and '/'. + Beware that in WinNT/2k files which eg have the same name + except for their case will not be distinguished by the + standard utilities and thus a "del filename" will delete + both "filename" and "fileName" without warning. */ + FILE_NAME_WIN32 = 0x01, + /* The standard WinNT/2k NTFS long filenames. Case insensitive. + All Unicode chars except: '\0', '"', '*', '/', ':', '<', + '>', '?', '\' and '|'. Further, names cannot end with a '.' + or a space. */ + FILE_NAME_DOS = 0x02, + /* The standard DOS filenames (8.3 format). Uppercase only. + All 8-bit characters greater space, except: '"', '*', '+', + ',', '/', ':', ';', '<', '=', '>', '?' and '\'. */ + FILE_NAME_WIN32_AND_DOS = 0x03, + /* 3 means that both the Win32 and the DOS filenames are + identical and hence have been saved in this single filename + record. */ +} __attribute__ ((__packed__)) FILE_NAME_TYPE_FLAGS; + +/* + * Attribute: Filename (0x30). + * + * NOTE: Always resident. + * NOTE: All fields, except the parent_directory, are only updated when the + * filename is changed. Until then, they just become out of sync with + * reality and the more up to date values are present in the standard + * information attribute. + * NOTE: There is conflicting information about the meaning of each of the time + * fields but the meaning as defined below has been verified to be + * correct by practical experimentation on Windows NT4 SP6a and is hence + * assumed to be the one and only correct interpretation. + */ +typedef struct { +/*hex ofs*/ +/* 0*/ MFT_REF parent_directory; /* Directory this filename is + referenced from. */ +/* 8*/ s64 creation_time; /* Time file was created. */ +/* 10*/ s64 last_data_change_time; /* Time the data attribute was last + modified. */ +/* 18*/ s64 last_mft_change_time; /* Time this mft record was last + modified. */ +/* 20*/ s64 last_access_time; /* Last time this mft record was + accessed. */ +/* 28*/ s64 allocated_size; /* Byte size of allocated space for the + data attribute. NOTE: Is a multiple + of the cluster size. */ +/* 30*/ s64 data_size; /* Byte size of actual data in data + attribute. */ +/* 38*/ FILE_ATTR_FLAGS file_attributes; /* Flags describing the file. */ +/* 3c*/ union { + /* 3c*/ struct { + /* 3c*/ u16 packed_ea_size; /* Size of the buffer needed to + pack the extended attributes + (EAs), if such are present.*/ + /* 3e*/ u16 reserved; /* Reserved for alignment. */ + } __attribute__ ((__packed__)) ea; + /* 3c*/ struct { + /* 3c*/ u32 reparse_point_tag; /* Type of reparse point, + present only in reparse + points and only if there are + no EAs. */ + } __attribute__ ((__packed__)) rp; + } __attribute__ ((__packed__)) type; +/* 40*/ u8 file_name_length; /* Length of file name in + (Unicode) characters. */ +/* 41*/ FILE_NAME_TYPE_FLAGS file_name_type; /* Namespace of the file name.*/ +/* 42*/ uchar_t file_name[0]; /* File name in Unicode. */ +} __attribute__ ((__packed__)) FILE_NAME_ATTR; + +/* + * GUID structures store globally unique identifiers (GUID). A GUID is a + * 128-bit value consisting of one group of eight hexadecimal digits, followed + * by three groups of four hexadecimal digits each, followed by one group of + * twelve hexadecimal digits. GUIDs are Microsoft's implementation of the + * distributed computing environment (DCE) universally unique identifier (UUID). + * Example of a GUID: + * 1F010768-5A73-BC91-0010A52216A7 + */ +typedef struct { + u32 data1; /* The first eight hexadecimal digits of the GUID. */ + u16 data2; /* The first group of four hexadecimal digits. */ + u16 data3; /* The second group of four hexadecimal digits. */ + u8 data4[8]; /* The first two bytes are the third group of four + hexadecimal digits. The remaining six bytes are the + final 12 hexadecimal digits. */ +} __attribute__ ((__packed__)) GUID; + +/* + * FILE_Extend/$ObjId contains an index named $O. This index contains all + * object_ids present on the volume as the index keys and the corresponding + * mft_record numbers as the index entry data parts. The data part (defined + * below) also contains three other object_ids: + * birth_volume_id - object_id of FILE_Volume on which the file was first + * created. Optional (i.e. can be zero). + * birth_object_id - object_id of file when it was first created. Usually + * equals the object_id. Optional (i.e. can be zero). + * domain_id - Reserved (always zero). + */ +typedef struct { + MFT_REF mft_reference; /* Mft record containing the object_id in + the index entry key. */ + union { + struct { + GUID birth_volume_id; + GUID birth_object_id; + GUID domain_id; + } __attribute__ ((__packed__)) origin; + u8 extended_info[48]; + } __attribute__ ((__packed__)) opt; +} __attribute__ ((__packed__)) OBJ_ID_INDEX_DATA; + +/* + * Attribute: Object id (NTFS 3.0+) (0x40). + * + * NOTE: Always resident. + */ +typedef struct { + GUID object_id; /* Unique id assigned to the + file.*/ + /* The following fields are optional. The attribute value size is 16 + bytes, i.e. sizeof(GUID), if these are not present at all. Note, + the entries can be present but one or more (or all) can be zero + meaning that that particular value(s) is(are) not defined. */ + union { + struct { + GUID birth_volume_id; /* Unique id of volume on which + the file was first created.*/ + GUID birth_object_id; /* Unique id of file when it was + first created. */ + GUID domain_id; /* Reserved, zero. */ + } __attribute__ ((__packed__)) origin; + u8 extended_info[48]; + } __attribute__ ((__packed__)) opt; +} __attribute__ ((__packed__)) OBJECT_ID_ATTR; + +/* + * The pre-defined IDENTIFIER_AUTHORITIES used as SID_IDENTIFIER_AUTHORITY in + * the SID structure (see below). + */ +//typedef enum { /* SID string prefix. */ +// SECURITY_NULL_SID_AUTHORITY = {0, 0, 0, 0, 0, 0}, /* S-1-0 */ +// SECURITY_WORLD_SID_AUTHORITY = {0, 0, 0, 0, 0, 1}, /* S-1-1 */ +// SECURITY_LOCAL_SID_AUTHORITY = {0, 0, 0, 0, 0, 2}, /* S-1-2 */ +// SECURITY_CREATOR_SID_AUTHORITY = {0, 0, 0, 0, 0, 3}, /* S-1-3 */ +// SECURITY_NON_UNIQUE_AUTHORITY = {0, 0, 0, 0, 0, 4}, /* S-1-4 */ +// SECURITY_NT_SID_AUTHORITY = {0, 0, 0, 0, 0, 5}, /* S-1-5 */ +//} IDENTIFIER_AUTHORITIES; + +/* + * These relative identifiers (RIDs) are used with the above identifier + * authorities to make up universal well-known SIDs. + * + * Note: The relative identifier (RID) refers to the portion of a SID, which + * identifies a user or group in relation to the authority that issued the SID. + * For example, the universal well-known SID Creator Owner ID (S-1-3-0) is + * made up of the identifier authority SECURITY_CREATOR_SID_AUTHORITY (3) and + * the relative identifier SECURITY_CREATOR_OWNER_RID (0). + */ +typedef enum { /* Identifier authority. */ + SECURITY_NULL_RID = 0, /* S-1-0 */ + SECURITY_WORLD_RID = 0, /* S-1-1 */ + SECURITY_LOCAL_RID = 0, /* S-1-2 */ + + SECURITY_CREATOR_OWNER_RID = 0, /* S-1-3 */ + SECURITY_CREATOR_GROUP_RID = 1, /* S-1-3 */ + + SECURITY_CREATOR_OWNER_SERVER_RID = 2, /* S-1-3 */ + SECURITY_CREATOR_GROUP_SERVER_RID = 3, /* S-1-3 */ + + SECURITY_DIALUP_RID = 1, + SECURITY_NETWORK_RID = 2, + SECURITY_BATCH_RID = 3, + SECURITY_INTERACTIVE_RID = 4, + SECURITY_SERVICE_RID = 6, + SECURITY_ANONYMOUS_LOGON_RID = 7, + SECURITY_PROXY_RID = 8, + SECURITY_ENTERPRISE_CONTROLLERS_RID=9, + SECURITY_SERVER_LOGON_RID = 9, + SECURITY_PRINCIPAL_SELF_RID = 0xa, + SECURITY_AUTHENTICATED_USER_RID = 0xb, + SECURITY_RESTRICTED_CODE_RID = 0xc, + SECURITY_TERMINAL_SERVER_RID = 0xd, + + SECURITY_LOGON_IDS_RID = 5, + SECURITY_LOGON_IDS_RID_COUNT = 3, + + SECURITY_LOCAL_SYSTEM_RID = 0x12, + + SECURITY_NT_NON_UNIQUE = 0x15, + + SECURITY_BUILTIN_DOMAIN_RID = 0x20, + + /* + * Well-known domain relative sub-authority values (RIDs). + */ + + /* Users. */ + DOMAIN_USER_RID_ADMIN = 0x1f4, + DOMAIN_USER_RID_GUEST = 0x1f5, + DOMAIN_USER_RID_KRBTGT = 0x1f6, + + /* Groups. */ + DOMAIN_GROUP_RID_ADMINS = 0x200, + DOMAIN_GROUP_RID_USERS = 0x201, + DOMAIN_GROUP_RID_GUESTS = 0x202, + DOMAIN_GROUP_RID_COMPUTERS = 0x203, + DOMAIN_GROUP_RID_CONTROLLERS = 0x204, + DOMAIN_GROUP_RID_CERT_ADMINS = 0x205, + DOMAIN_GROUP_RID_SCHEMA_ADMINS = 0x206, + DOMAIN_GROUP_RID_ENTERPRISE_ADMINS= 0x207, + DOMAIN_GROUP_RID_POLICY_ADMINS = 0x208, + + /* Aliases. */ + DOMAIN_ALIAS_RID_ADMINS = 0x220, + DOMAIN_ALIAS_RID_USERS = 0x221, + DOMAIN_ALIAS_RID_GUESTS = 0x222, + DOMAIN_ALIAS_RID_POWER_USERS = 0x223, + + DOMAIN_ALIAS_RID_ACCOUNT_OPS = 0x224, + DOMAIN_ALIAS_RID_SYSTEM_OPS = 0x225, + DOMAIN_ALIAS_RID_PRINT_OPS = 0x226, + DOMAIN_ALIAS_RID_BACKUP_OPS = 0x227, + + DOMAIN_ALIAS_RID_REPLICATOR = 0x228, + DOMAIN_ALIAS_RID_RAS_SERVERS = 0x229, + DOMAIN_ALIAS_RID_PREW2KCOMPACCESS = 0x22a, +} RELATIVE_IDENTIFIERS; + +/* + * The universal well-known SIDs: + * + * NULL_SID S-1-0-0 + * WORLD_SID S-1-1-0 + * LOCAL_SID S-1-2-0 + * CREATOR_OWNER_SID S-1-3-0 + * CREATOR_GROUP_SID S-1-3-1 + * CREATOR_OWNER_SERVER_SID S-1-3-2 + * CREATOR_GROUP_SERVER_SID S-1-3-3 + * + * (Non-unique IDs) S-1-4 + * + * NT well-known SIDs: + * + * NT_AUTHORITY_SID S-1-5 + * DIALUP_SID S-1-5-1 + * + * NETWORD_SID S-1-5-2 + * BATCH_SID S-1-5-3 + * INTERACTIVE_SID S-1-5-4 + * SERVICE_SID S-1-5-6 + * ANONYMOUS_LOGON_SID S-1-5-7 (aka null logon session) + * PROXY_SID S-1-5-8 + * SERVER_LOGON_SID S-1-5-9 (aka domain controller account) + * SELF_SID S-1-5-10 (self RID) + * AUTHENTICATED_USER_SID S-1-5-11 + * RESTRICTED_CODE_SID S-1-5-12 (running restricted code) + * TERMINAL_SERVER_SID S-1-5-13 (running on terminal server) + * + * (Logon IDs) S-1-5-5-X-Y + * + * (NT non-unique IDs) S-1-5-0x15-... + * + * (Built-in domain) S-1-5-0x20 + */ + +/* + * The SID_IDENTIFIER_AUTHORITY is a 48-bit value used in the SID structure. + */ +typedef union { + struct { + u32 low; /* Low 32-bits. */ + u16 high; /* High 16-bits. */ + } __attribute__ ((__packed__)) parts; + u8 value[6]; /* Value as individual bytes. */ +} __attribute__ ((__packed__)) SID_IDENTIFIER_AUTHORITY; + +/* + * The SID structure is a variable-length structure used to uniquely identify + * users or groups. SID stands for security identifier. + * + * The standard textual representation of the SID is of the form: + * S-R-I-S-S... + * Where: + * - The first "S" is the literal character 'S' identifying the following + * digits as a SID. + * - R is the revision level of the SID expressed as a sequence of digits + * either in decimal or hexadecimal (if the later, prefixed by "0x"). + * - I is the 48-bit identifier_authority, expressed as digits as R above. + * - S... is one or more sub_authority values, expressed as digits as above. + * + * Example SID; the domain-relative SID of the local Administrators group on + * Windows NT/2k: + * S-1-5-32-544 + * This translates to a SID with: + * revision = 1, + * sub_authority_count = 2, + * identifier_authority = {0,0,0,0,0,5}, // SECURITY_NT_AUTHORITY + * sub_authority[0] = 32, // SECURITY_BUILTIN_DOMAIN_RID + * sub_authority[1] = 544 // DOMAIN_ALIAS_RID_ADMINS + */ +typedef struct { + u8 revision; + u8 sub_authority_count; + SID_IDENTIFIER_AUTHORITY identifier_authority; + u32 sub_authority[1]; /* At least one sub_authority. */ +} __attribute__ ((__packed__)) SID; + +/* + * Current constants for SIDs. + */ +typedef enum { + SID_REVISION = 1, /* Current revision level. */ + SID_MAX_SUB_AUTHORITIES = 15, /* Maximum number of those. */ + SID_RECOMMENDED_SUB_AUTHORITIES = 1, /* Will change to around 6 in + a future revision. */ +} SID_CONSTANTS; + +/* + * The predefined ACE types (8-bit, see below). + */ +typedef enum { + ACCESS_MIN_MS_ACE_TYPE = 0, + ACCESS_ALLOWED_ACE_TYPE = 0, + ACCESS_DENIED_ACE_TYPE = 1, + SYSTEM_AUDIT_ACE_TYPE = 2, + SYSTEM_ALARM_ACE_TYPE = 3, /* Not implemented as of Win2k. */ + ACCESS_MAX_MS_V2_ACE_TYPE = 3, + + ACCESS_ALLOWED_COMPOUND_ACE_TYPE= 4, + ACCESS_MAX_MS_V3_ACE_TYPE = 4, + + /* The following are Win2k only. */ + ACCESS_MIN_MS_OBJECT_ACE_TYPE = 5, + ACCESS_ALLOWED_OBJECT_ACE_TYPE = 5, + ACCESS_DENIED_OBJECT_ACE_TYPE = 6, + SYSTEM_AUDIT_OBJECT_ACE_TYPE = 7, + SYSTEM_ALARM_OBJECT_ACE_TYPE = 8, + ACCESS_MAX_MS_OBJECT_ACE_TYPE = 8, + + ACCESS_MAX_MS_V4_ACE_TYPE = 8, + + /* This one is for WinNT&2k. */ + ACCESS_MAX_MS_ACE_TYPE = 8, +} __attribute__ ((__packed__)) ACE_TYPES; + +/* + * The ACE flags (8-bit) for audit and inheritance (see below). + * + * SUCCESSFUL_ACCESS_ACE_FLAG is only used with system audit and alarm ACE + * types to indicate that a message is generated (in Windows!) for successful + * accesses. + * + * FAILED_ACCESS_ACE_FLAG is only used with system audit and alarm ACE types + * to indicate that a message is generated (in Windows!) for failed accesses. + */ +typedef enum { + /* The inheritance flags. */ + OBJECT_INHERIT_ACE = 0x01, + CONTAINER_INHERIT_ACE = 0x02, + NO_PROPAGATE_INHERIT_ACE = 0x04, + INHERIT_ONLY_ACE = 0x08, + INHERITED_ACE = 0x10, /* Win2k only. */ + VALID_INHERIT_FLAGS = 0x1f, + + /* The audit flags. */ + SUCCESSFUL_ACCESS_ACE_FLAG = 0x40, + FAILED_ACCESS_ACE_FLAG = 0x80, +} __attribute__ ((__packed__)) ACE_FLAGS; + +/* + * An ACE is an access-control entry in an access-control list (ACL). + * An ACE defines access to an object for a specific user or group or defines + * the types of access that generate system-administration messages or alarms + * for a specific user or group. The user or group is identified by a security + * identifier (SID). + * + * Each ACE starts with an ACE_HEADER structure (aligned on 4-byte boundary), + * which specifies the type and size of the ACE. The format of the subsequent + * data depends on the ACE type. + */ +typedef struct { +/*Ofs*/ +/* 0*/ ACE_TYPES type; /* Type of the ACE. */ +/* 1*/ ACE_FLAGS flags; /* Flags describing the ACE. */ +/* 2*/ u16 size; /* Size in bytes of the ACE. */ +} __attribute__ ((__packed__)) ACE_HEADER; + +/* + * The access mask (32-bit). Defines the access rights. + */ +typedef enum { + /* + * The specific rights (bits 0 to 15). Depend on the type of the + * object being secured by the ACE. + */ + + /* Specific rights for files and directories are as follows: */ + + /* Right to read data from the file. (FILE) */ + FILE_READ_DATA = const_cpu_to_le32(0x00000001), + /* Right to list contents of a directory. (DIRECTORY) */ + FILE_LIST_DIRECTORY = const_cpu_to_le32(0x00000001), + + /* Right to write data to the file. (FILE) */ + FILE_WRITE_DATA = const_cpu_to_le32(0x00000002), + /* Right to create a file in the directory. (DIRECTORY) */ + FILE_ADD_FILE = const_cpu_to_le32(0x00000002), + + /* Right to append data to the file. (FILE) */ + FILE_APPEND_DATA = const_cpu_to_le32(0x00000004), + /* Right to create a subdirectory. (DIRECTORY) */ + FILE_ADD_SUBDIRECTORY = const_cpu_to_le32(0x00000004), + + /* Right to read extended attributes. (FILE/DIRECTORY) */ + FILE_READ_EA = const_cpu_to_le32(0x00000008), + + /* Right to write extended attributes. (FILE/DIRECTORY) */ + FILE_WRITE_EA = const_cpu_to_le32(0x00000010), + + /* Right to execute a file. (FILE) */ + FILE_EXECUTE = const_cpu_to_le32(0x00000020), + /* Right to traverse the directory. (DIRECTORY) */ + FILE_TRAVERSE = const_cpu_to_le32(0x00000020), + + /* + * Right to delete a directory and all the files it contains (its + * children), even if the files are read-only. (DIRECTORY) + */ + FILE_DELETE_CHILD = const_cpu_to_le32(0x00000040), + + /* Right to read file attributes. (FILE/DIRECTORY) */ + FILE_READ_ATTRIBUTES = const_cpu_to_le32(0x00000080), + + /* Right to change file attributes. (FILE/DIRECTORY) */ + FILE_WRITE_ATTRIBUTES = const_cpu_to_le32(0x00000100), + + /* + * The standard rights (bits 16 to 23). Are independent of the type of + * object being secured. + */ + + /* Right to delete the object. */ + DELETE = const_cpu_to_le32(0x00010000), + + /* + * Right to read the information in the object's security descriptor, + * not including the information in the SACL. I.e. right to read the + * security descriptor and owner. + */ + READ_CONTROL = const_cpu_to_le32(0x00020000), + + /* Right to modify the DACL in the object's security descriptor. */ + WRITE_DAC = const_cpu_to_le32(0x00040000), + + /* Right to change the owner in the object's security descriptor. */ + WRITE_OWNER = const_cpu_to_le32(0x00080000), + + /* + * Right to use the object for synchronization. Enables a process to + * wait until the object is in the signalled state. Some object types + * do not support this access right. + */ + SYNCHRONIZE = const_cpu_to_le32(0x00100000), + + /* + * The following STANDARD_RIGHTS_* are combinations of the above for + * convenience and are defined by the Win32 API. + */ + + /* These are currently defined to READ_CONTROL. */ + STANDARD_RIGHTS_READ = const_cpu_to_le32(0x00020000), + STANDARD_RIGHTS_WRITE = const_cpu_to_le32(0x00020000), + STANDARD_RIGHTS_EXECUTE = const_cpu_to_le32(0x00020000), + + /* Combines DELETE, READ_CONTROL, WRITE_DAC, and WRITE_OWNER access. */ + STANDARD_RIGHTS_REQUIRED = const_cpu_to_le32(0x000f0000), + + /* + * Combines DELETE, READ_CONTROL, WRITE_DAC, WRITE_OWNER, and + * SYNCHRONIZE access. + */ + STANDARD_RIGHTS_ALL = const_cpu_to_le32(0x001f0000), + + /* + * The access system ACL and maximum allowed access types (bits 24 to + * 25, bits 26 to 27 are reserved). + */ + ACCESS_SYSTEM_SECURITY = const_cpu_to_le32(0x01000000), + MAXIMUM_ALLOWED = const_cpu_to_le32(0x02000000), + + /* + * The generic rights (bits 28 to 31). These map onto the standard and + * specific rights. + */ + + /* Read, write, and execute access. */ + GENERIC_ALL = const_cpu_to_le32(0x10000000), + + /* Execute access. */ + GENERIC_EXECUTE = const_cpu_to_le32(0x20000000), + + /* + * Write access. For files, this maps onto: + * FILE_APPEND_DATA | FILE_WRITE_ATTRIBUTES | FILE_WRITE_DATA | + * FILE_WRITE_EA | STANDARD_RIGHTS_WRITE | SYNCHRONIZE + * For directories, the mapping has the same numberical value. See + * above for the descriptions of the rights granted. + */ + GENERIC_WRITE = const_cpu_to_le32(0x40000000), + + /* + * Read access. For files, this maps onto: + * FILE_READ_ATTRIBUTES | FILE_READ_DATA | FILE_READ_EA | + * STANDARD_RIGHTS_READ | SYNCHRONIZE + * For directories, the mapping has the same numberical value. See + * above for the descriptions of the rights granted. + */ + GENERIC_READ = const_cpu_to_le32(0x80000000), +} ACCESS_MASK; + +/* + * The generic mapping array. Used to denote the mapping of each generic + * access right to a specific access mask. + * + * FIXME: What exactly is this and what is it for? (AIA) + */ +typedef struct { + ACCESS_MASK generic_read; + ACCESS_MASK generic_write; + ACCESS_MASK generic_execute; + ACCESS_MASK generic_all; +} __attribute__ ((__packed__)) GENERIC_MAPPING; + +/* + * The predefined ACE type structures are as defined below. + */ + +/* + * ACCESS_ALLOWED_ACE, ACCESS_DENIED_ACE, SYSTEM_AUDIT_ACE, SYSTEM_ALARM_ACE + */ +typedef struct { +/* 0 ACE_HEADER; -- Unfolded here as gcc doesn't like unnamed structs. */ + ACE_TYPES type; /* Type of the ACE. */ + ACE_FLAGS flags; /* Flags describing the ACE. */ + u16 size; /* Size in bytes of the ACE. */ +/* 4*/ ACCESS_MASK mask; /* Access mask associated with the ACE. */ + +/* 8*/ SID sid; /* The SID associated with the ACE. */ +} __attribute__ ((__packed__)) ACCESS_ALLOWED_ACE, ACCESS_DENIED_ACE, + SYSTEM_AUDIT_ACE, SYSTEM_ALARM_ACE; + +/* + * The object ACE flags (32-bit). + */ +typedef enum { + ACE_OBJECT_TYPE_PRESENT = const_cpu_to_le32(1), + ACE_INHERITED_OBJECT_TYPE_PRESENT = const_cpu_to_le32(2), +} OBJECT_ACE_FLAGS; + +typedef struct { +/* 0 ACE_HEADER; -- Unfolded here as gcc doesn't like unnamed structs. */ + ACE_TYPES type; /* Type of the ACE. */ + ACE_FLAGS flags; /* Flags describing the ACE. */ + u16 size; /* Size in bytes of the ACE. */ +/* 4*/ ACCESS_MASK mask; /* Access mask associated with the ACE. */ + +/* 8*/ OBJECT_ACE_FLAGS object_flags; /* Flags describing the object ACE. */ +/* 12*/ GUID object_type; +/* 28*/ GUID inherited_object_type; + +/* 44*/ SID sid; /* The SID associated with the ACE. */ +} __attribute__ ((__packed__)) ACCESS_ALLOWED_OBJECT_ACE, + ACCESS_DENIED_OBJECT_ACE, + SYSTEM_AUDIT_OBJECT_ACE, + SYSTEM_ALARM_OBJECT_ACE; + +/* + * An ACL is an access-control list (ACL). + * An ACL starts with an ACL header structure, which specifies the size of + * the ACL and the number of ACEs it contains. The ACL header is followed by + * zero or more access control entries (ACEs). The ACL as well as each ACE + * are aligned on 4-byte boundaries. + */ +typedef struct { + u8 revision; /* Revision of this ACL. */ + u8 alignment1; + u16 size; /* Allocated space in bytes for ACL. Includes this + header, the ACEs and the remaining free space. */ + u16 ace_count; /* Number of ACEs in the ACL. */ + u16 alignment2; +/* sizeof() = 8 bytes */ +} __attribute__ ((__packed__)) ACL; + +/* + * Current constants for ACLs. + */ +typedef enum { + /* Current revision. */ + ACL_REVISION = 2, + ACL_REVISION_DS = 4, + + /* History of revisions. */ + ACL_REVISION1 = 1, + MIN_ACL_REVISION = 2, + ACL_REVISION2 = 2, + ACL_REVISION3 = 3, + ACL_REVISION4 = 4, + MAX_ACL_REVISION = 4, +} ACL_CONSTANTS; + +/* + * The security descriptor control flags (16-bit). + * + * SE_OWNER_DEFAULTED - This boolean flag, when set, indicates that the + * SID pointed to by the Owner field was provided by a + * defaulting mechanism rather than explicitly provided by the + * original provider of the security descriptor. This may + * affect the treatment of the SID with respect to inheritence + * of an owner. + * + * SE_GROUP_DEFAULTED - This boolean flag, when set, indicates that the + * SID in the Group field was provided by a defaulting mechanism + * rather than explicitly provided by the original provider of + * the security descriptor. This may affect the treatment of + * the SID with respect to inheritence of a primary group. + * + * SE_DACL_PRESENT - This boolean flag, when set, indicates that the + * security descriptor contains a discretionary ACL. If this + * flag is set and the Dacl field of the SECURITY_DESCRIPTOR is + * null, then a null ACL is explicitly being specified. + * + * SE_DACL_DEFAULTED - This boolean flag, when set, indicates that the + * ACL pointed to by the Dacl field was provided by a defaulting + * mechanism rather than explicitly provided by the original + * provider of the security descriptor. This may affect the + * treatment of the ACL with respect to inheritence of an ACL. + * This flag is ignored if the DaclPresent flag is not set. + * + * SE_SACL_PRESENT - This boolean flag, when set, indicates that the + * security descriptor contains a system ACL pointed to by the + * Sacl field. If this flag is set and the Sacl field of the + * SECURITY_DESCRIPTOR is null, then an empty (but present) + * ACL is being specified. + * + * SE_SACL_DEFAULTED - This boolean flag, when set, indicates that the + * ACL pointed to by the Sacl field was provided by a defaulting + * mechanism rather than explicitly provided by the original + * provider of the security descriptor. This may affect the + * treatment of the ACL with respect to inheritence of an ACL. + * This flag is ignored if the SaclPresent flag is not set. + * + * SE_SELF_RELATIVE - This boolean flag, when set, indicates that the + * security descriptor is in self-relative form. In this form, + * all fields of the security descriptor are contiguous in memory + * and all pointer fields are expressed as offsets from the + * beginning of the security descriptor. + */ +typedef enum { + SE_OWNER_DEFAULTED = const_cpu_to_le16(0x0001), + SE_GROUP_DEFAULTED = const_cpu_to_le16(0x0002), + SE_DACL_PRESENT = const_cpu_to_le16(0x0004), + SE_DACL_DEFAULTED = const_cpu_to_le16(0x0008), + SE_SACL_PRESENT = const_cpu_to_le16(0x0010), + SE_SACL_DEFAULTED = const_cpu_to_le16(0x0020), + SE_DACL_AUTO_INHERIT_REQ = const_cpu_to_le16(0x0100), + SE_SACL_AUTO_INHERIT_REQ = const_cpu_to_le16(0x0200), + SE_DACL_AUTO_INHERITED = const_cpu_to_le16(0x0400), + SE_SACL_AUTO_INHERITED = const_cpu_to_le16(0x0800), + SE_DACL_PROTECTED = const_cpu_to_le16(0x1000), + SE_SACL_PROTECTED = const_cpu_to_le16(0x2000), + SE_RM_CONTROL_VALID = const_cpu_to_le16(0x4000), + SE_SELF_RELATIVE = const_cpu_to_le16(0x8000), +} __attribute__ ((__packed__)) SECURITY_DESCRIPTOR_CONTROL; + +/* + * Self-relative security descriptor. Contains the owner and group SIDs as well + * as the sacl and dacl ACLs inside the security descriptor itself. + */ +typedef struct { + u8 revision; /* Revision level of the security descriptor. */ + u8 alignment; + SECURITY_DESCRIPTOR_CONTROL control; /* Flags qualifying the type of + the descriptor as well as the following fields. */ + u32 owner; /* Byte offset to a SID representing an object's + owner. If this is NULL, no owner SID is present in + the descriptor. */ + u32 group; /* Byte offset to a SID representing an object's + primary group. If this is NULL, no primary group + SID is present in the descriptor. */ + u32 sacl; /* Byte offset to a system ACL. Only valid, if + SE_SACL_PRESENT is set in the control field. If + SE_SACL_PRESENT is set but sacl is NULL, a NULL ACL + is specified. */ + u32 dacl; /* Byte offset to a discretionary ACL. Only valid, if + SE_DACL_PRESENT is set in the control field. If + SE_DACL_PRESENT is set but dacl is NULL, a NULL ACL + (unconditionally granting access) is specified. */ +/* sizeof() = 0x14 bytes */ +} __attribute__ ((__packed__)) SECURITY_DESCRIPTOR_RELATIVE; + +/* + * Absolute security descriptor. Does not contain the owner and group SIDs, nor + * the sacl and dacl ACLs inside the security descriptor. Instead, it contains + * pointers to these structures in memory. Obviously, absolute security + * descriptors are only useful for in memory representations of security + * descriptors. On disk, a self-relative security descriptor is used. + */ +typedef struct { + u8 revision; /* Revision level of the security descriptor. */ + u8 alignment; + SECURITY_DESCRIPTOR_CONTROL control; /* Flags qualifying the type of + the descriptor as well as the following fields. */ + SID *owner; /* Points to a SID representing an object's owner. If + this is NULL, no owner SID is present in the + descriptor. */ + SID *group; /* Points to a SID representing an object's primary + group. If this is NULL, no primary group SID is + present in the descriptor. */ + ACL *sacl; /* Points to a system ACL. Only valid, if + SE_SACL_PRESENT is set in the control field. If + SE_SACL_PRESENT is set but sacl is NULL, a NULL ACL + is specified. */ + ACL *dacl; /* Points to a discretionary ACL. Only valid, if + SE_DACL_PRESENT is set in the control field. If + SE_DACL_PRESENT is set but dacl is NULL, a NULL ACL + (unconditionally granting access) is specified. */ +} __attribute__ ((__packed__)) SECURITY_DESCRIPTOR; + +/* + * Current constants for security descriptors. + */ +typedef enum { + /* Current revision. */ + SECURITY_DESCRIPTOR_REVISION = 1, + SECURITY_DESCRIPTOR_REVISION1 = 1, + + /* The sizes of both the absolute and relative security descriptors is + the same as pointers, at least on ia32 architecture are 32-bit. */ + SECURITY_DESCRIPTOR_MIN_LENGTH = sizeof(SECURITY_DESCRIPTOR), +} SECURITY_DESCRIPTOR_CONSTANTS; + +/* + * Attribute: Security descriptor (0x50). A standard self-relative security + * descriptor. + * + * NOTE: Can be resident or non-resident. + * NOTE: Not used in NTFS 3.0+, as security descriptors are stored centrally + * in FILE_Secure and the correct descriptor is found using the security_id + * from the standard information attribute. + */ +typedef SECURITY_DESCRIPTOR_RELATIVE SECURITY_DESCRIPTOR_ATTR; + +/* + * On NTFS 3.0+, all security descriptors are stored in FILE_Secure. Only one + * referenced instance of each unique security descriptor is stored. + * + * FILE_Secure contains no unnamed data attribute, i.e. it has zero length. It + * does, however, contain two indexes ($SDH and $SII) as well as a named data + * stream ($SDS). + * + * Every unique security descriptor is assigned a unique security identifier + * (security_id, not to be confused with a SID). The security_id is unique for + * the NTFS volume and is used as an index into the $SII index, which maps + * security_ids to the security descriptor's storage location within the $SDS + * data attribute. The $SII index is sorted by ascending security_id. + * + * A simple hash is computed from each security descriptor. This hash is used + * as an index into the $SDH index, which maps security descriptor hashes to + * the security descriptor's storage location within the $SDS data attribute. + * The $SDH index is sorted by security descriptor hash and is stored in a B+ + * tree. When searching $SDH (with the intent of determining whether or not a + * new security descriptor is already present in the $SDS data stream), if a + * matching hash is found, but the security descriptors do not match, the + * search in the $SDH index is continued, searching for a next matching hash. + * + * When a precise match is found, the security_id coresponding to the security + * descriptor in the $SDS attribute is read from the found $SDH index entry and + * is stored in the $STANDARD_INFORMATION attribute of the file/directory to + * which the security descriptor is being applied. The $STANDARD_INFORMATION + * attribute is present in all base mft records (i.e. in all files and + * directories). + * + * If a match is not found, the security descriptor is assigned a new unique + * security_id and is added to the $SDS data attribute. Then, entries + * referencing the this security descriptor in the $SDS data attribute are + * added to the $SDH and $SII indexes. + * + * Note: Entries are never deleted from FILE_Secure, even if nothing + * references an entry any more. + */ + +/* + * This header precedes each security descriptor in the $SDS data stream. + * This is also the index entry data part of both the $SII and $SDH indexes. + */ +typedef struct { + u32 hash; /* Hash of the security descriptor. */ + u32 security_id; /* The security_id assigned to the descriptor. */ + u64 offset; /* Byte offset of this entry in the $SDS stream. */ + u32 length; /* Size in bytes of this entry in $SDS stream. */ +} __attribute__ ((__packed__)) SECURITY_DESCRIPTOR_HEADER; + +/* + * The $SDS data stream contains the security descriptors, aligned on 16-byte + * boundaries, sorted by security_id in a B+ tree. Security descriptors cannot + * cross 256kib boundaries (this restriction is imposed by the Windows cache + * manager). Each security descriptor is contained in a SDS_ENTRY structure. + * Also, each security descriptor is stored twice in the $SDS stream with a + * fixed offset of 0x40000 bytes (256kib, the Windows cache manager's max size) + * between them; i.e. if a SDS_ENTRY specifies an offset of 0x51d0, then the + * the first copy of the security descriptor will be at offset 0x51d0 in the + * $SDS data stream and the second copy will be at offset 0x451d0. + */ +typedef struct { +/*Ofs*/ +/* 0 SECURITY_DESCRIPTOR_HEADER; -- Unfolded here as gcc doesn't like + unnamed structs. */ + u32 hash; /* Hash of the security descriptor. */ + u32 security_id; /* The security_id assigned to the descriptor. */ + u64 offset; /* Byte offset of this entry in the $SDS stream. */ + u32 length; /* Size in bytes of this entry in $SDS stream. */ +/* 20*/ SECURITY_DESCRIPTOR_RELATIVE sid; /* The self-relative security + descriptor. */ +} __attribute__ ((__packed__)) SDS_ENTRY; + +/* + * The index entry key used in the $SII index. The collation type is + * COLLATION_NTOFS_ULONG. + */ +typedef struct { + u32 security_id; /* The security_id assigned to the descriptor. */ +} __attribute__ ((__packed__)) SII_INDEX_KEY; + +/* + * The index entry key used in the $SDH index. The keys are sorted first by + * hash and then by security_id. The collation rule is + * COLLATION_NTOFS_SECURITY_HASH. + */ +typedef struct { + u32 hash; /* Hash of the security descriptor. */ + u32 security_id; /* The security_id assigned to the descriptor. */ +} __attribute__ ((__packed__)) SDH_INDEX_KEY; + +/* + * Attribute: Volume name (0x60). + * + * NOTE: Always resident. + * NOTE: Present only in FILE_Volume. + */ +typedef struct { + uchar_t name[0]; /* The name of the volume in Unicode. */ +} __attribute__ ((__packed__)) VOLUME_NAME; + +/* + * Possible flags for the volume (16-bit). + */ +typedef enum { + VOLUME_IS_DIRTY = const_cpu_to_le16(0x0001), + VOLUME_RESIZE_LOG_FILE = const_cpu_to_le16(0x0002), + VOLUME_UPGRADE_ON_MOUNT = const_cpu_to_le16(0x0004), + VOLUME_MOUNTED_ON_NT4 = const_cpu_to_le16(0x0008), + VOLUME_DELETE_USN_UNDERWAY = const_cpu_to_le16(0x0010), + VOLUME_REPAIR_OBJECT_ID = const_cpu_to_le16(0x0020), + VOLUME_MODIFIED_BY_CHKDSK = const_cpu_to_le16(0x8000), + VOLUME_FLAGS_MASK = const_cpu_to_le16(0x803f), +} __attribute__ ((__packed__)) VOLUME_FLAGS; + +/* + * Attribute: Volume information (0x70). + * + * NOTE: Always resident. + * NOTE: Present only in FILE_Volume. + * NOTE: Windows 2000 uses NTFS 3.0 while Windows NT4 service pack 6a uses + * NTFS 1.2. I haven't personally seen other values yet. + */ +typedef struct { + u64 reserved; /* Not used (yet?). */ + u8 major_ver; /* Major version of the ntfs format. */ + u8 minor_ver; /* Minor version of the ntfs format. */ + VOLUME_FLAGS flags; /* Bit array of VOLUME_* flags. */ +} __attribute__ ((__packed__)) VOLUME_INFORMATION; + +/* + * Attribute: Data attribute (0x80). + * + * NOTE: Can be resident or non-resident. + * + * Data contents of a file (i.e. the unnamed stream) or of a named stream. + */ +typedef struct { + u8 data[0]; /* The file's data contents. */ +} __attribute__ ((__packed__)) DATA_ATTR; + +/* + * Index header flags (8-bit). + */ +typedef enum { + /* When index header is in an index root attribute: */ + SMALL_INDEX = 0, /* The index is small enough to fit inside the + index root attribute and there is no index + allocation attribute present. */ + LARGE_INDEX = 1, /* The index is too large to fit in the index + root attribute and/or an index allocation + attribute is present. */ + /* + * When index header is in an index block, i.e. is part of index + * allocation attribute: + */ + LEAF_NODE = 0, /* This is a leaf node, i.e. there are no more + nodes branching off it. */ + INDEX_NODE = 1, /* This node indexes other nodes, i.e. is not a + leaf node. */ + NODE_MASK = 1, /* Mask for accessing the *_NODE bits. */ +} __attribute__ ((__packed__)) INDEX_HEADER_FLAGS; + +/* + * This is the header for indexes, describing the INDEX_ENTRY records, which + * follow the INDEX_HEADER. Together the index header and the index entries + * make up a complete index. + * + * IMPORTANT NOTE: The offset, length and size structure members are counted + * relative to the start of the index header structure and not relative to the + * start of the index root or index allocation structures themselves. + */ +typedef struct { + u32 entries_offset; /* Byte offset to first INDEX_ENTRY + aligned to 8-byte boundary. */ + u32 index_length; /* Data size of the index in bytes, + i.e. bytes used from allocated + size, aligned to 8-byte boundary. */ + u32 allocated_size; /* Byte size of this index (block), + multiple of 8 bytes. */ + /* NOTE: For the index root attribute, the above two numbers are always + equal, as the attribute is resident and it is resized as needed. In + the case of the index allocation attribute the attribute is not + resident and hence the allocated_size is a fixed value and must + equal the index_block_size specified by the INDEX_ROOT attribute + corresponding to the INDEX_ALLOCATION attribute this INDEX_BLOCK + belongs to. */ + INDEX_HEADER_FLAGS flags; /* Bit field of INDEX_HEADER_FLAGS. */ + u8 reserved[3]; /* Reserved/align to 8-byte boundary. */ +} __attribute__ ((__packed__)) INDEX_HEADER; + +/* + * Attribute: Index root (0x90). + * + * NOTE: Always resident. + * + * This is followed by a sequence of index entries (INDEX_ENTRY structures) + * as described by the index header. + * + * When a directory is small enough to fit inside the index root then this + * is the only attribute describing the directory. When the directory is too + * large to fit in the index root, on the other hand, two aditional attributes + * are present: an index allocation attribute, containing sub-nodes of the B+ + * directory tree (see below), and a bitmap attribute, describing which virtual + * cluster numbers (vcns) in the index allocation attribute are in use by an + * index block. + * + * NOTE: The root directory (FILE_root) contains an entry for itself. Other + * dircetories do not contain entries for themselves, though. + */ +typedef struct { + ATTR_TYPES type; /* Type of the indexed attribute. Is + $FILE_NAME for directories, zero + for view indexes. No other values + allowed. */ + COLLATION_RULES collation_rule; /* Collation rule used to sort the + index entries. If type is $FILE_NAME, + this must be COLLATION_FILE_NAME. */ + u32 index_block_size; /* Size of each index block in bytes (in + the index allocation attribute). */ + u8 clusters_per_index_block; /* Cluster size of each index block (in + the index allocation attribute), when + an index block is >= than a cluster, + otherwise this will be the log of + the size (like how the encoding of + the mft record size and the index + record size found in the boot sector + work). Has to be a power of 2. */ + u8 reserved[3]; /* Reserved/align to 8-byte boundary. */ + INDEX_HEADER index; /* Index header describing the + following index entries. */ +} __attribute__ ((__packed__)) INDEX_ROOT; + +/* + * Attribute: Index allocation (0xa0). + * + * NOTE: Always non-resident (doesn't make sense to be resident anyway!). + * + * This is an array of index blocks. Each index block starts with an + * INDEX_BLOCK structure containing an index header, followed by a sequence of + * index entries (INDEX_ENTRY structures), as described by the INDEX_HEADER. + */ +typedef struct { +/* 0 NTFS_RECORD; -- Unfolded here as gcc doesn't like unnamed structs. */ + NTFS_RECORD_TYPES magic;/* Magic is "INDX". */ + u16 usa_ofs; /* See NTFS_RECORD definition. */ + u16 usa_count; /* See NTFS_RECORD definition. */ + +/* 8*/ s64 lsn; /* $LogFile sequence number of the last + modification of this index block. */ +/* 16*/ VCN index_block_vcn; /* Virtual cluster number of the index block. + If the cluster_size on the volume is <= the + index_block_size of the directory, + index_block_vcn counts in units of clusters, + and in units of sectors otherwise. */ +/* 24*/ INDEX_HEADER index; /* Describes the following index entries. */ +/* sizeof()= 40 (0x28) bytes */ +/* + * When creating the index block, we place the update sequence array at this + * offset, i.e. before we start with the index entries. This also makes sense, + * otherwise we could run into problems with the update sequence array + * containing in itself the last two bytes of a sector which would mean that + * multi sector transfer protection wouldn't work. As you can't protect data + * by overwriting it since you then can't get it back... + * When reading use the data from the ntfs record header. + */ +} __attribute__ ((__packed__)) INDEX_BLOCK; + +typedef INDEX_BLOCK INDEX_ALLOCATION; + +/* + * The system file FILE_Extend/$Reparse contains an index named $R listing + * all reparse points on the volume. The index entry keys are as defined + * below. Note, that there is no index data associated with the index entries. + * + * The index entries are sorted by the index key file_id. The collation rule is + * COLLATION_NTOFS_ULONGS. FIXME: Verify whether the reparse_tag is not the + * primary key / is not a key at all. (AIA) + */ +typedef struct { + u32 reparse_tag; /* Reparse point type (inc. flags). */ + MFT_REF file_id; /* Mft record of the file containing the + reparse point attribute. */ +} __attribute__ ((__packed__)) REPARSE_INDEX_KEY; + +/* + * Quota flags (32-bit). + */ +typedef enum { + /* The user quota flags. Names explain meaning. */ + QUOTA_FLAG_DEFAULT_LIMITS = const_cpu_to_le32(0x00000001), + QUOTA_FLAG_LIMIT_REACHED = const_cpu_to_le32(0x00000002), + QUOTA_FLAG_ID_DELETED = const_cpu_to_le32(0x00000004), + + QUOTA_FLAG_USER_MASK = const_cpu_to_le32(0x00000007), + /* Bit mask for user quota flags. */ + + /* These flags are only present in the quota defaults index entry, + i.e. in the entry where owner_id = QUOTA_DEFAULTS_ID. */ + QUOTA_FLAG_TRACKING_ENABLED = const_cpu_to_le32(0x00000010), + QUOTA_FLAG_ENFORCEMENT_ENABLED = const_cpu_to_le32(0x00000020), + QUOTA_FLAG_TRACKING_REQUESTED = const_cpu_to_le32(0x00000040), + QUOTA_FLAG_LOG_THRESHOLD = const_cpu_to_le32(0x00000080), + QUOTA_FLAG_LOG_LIMIT = const_cpu_to_le32(0x00000100), + QUOTA_FLAG_OUT_OF_DATE = const_cpu_to_le32(0x00000200), + QUOTA_FLAG_CORRUPT = const_cpu_to_le32(0x00000400), + QUOTA_FLAG_PENDING_DELETES = const_cpu_to_le32(0x00000800), +} QUOTA_FLAGS; + +/* + * The system file FILE_Extend/$Quota contains two indexes $O and $Q. Quotas + * are on a per volume and per user basis. + * + * The $Q index contains one entry for each existing user_id on the volume. The + * index key is the user_id of the user/group owning this quota control entry, + * i.e. the key is the owner_id. The user_id of the owner of a file, i.e. the + * owner_id, is found in the standard information attribute. The collation rule + * for $Q is COLLATION_NTOFS_ULONG. + * + * The $O index contains one entry for each user/group who has been assigned + * a quota on that volume. The index key holds the SID of the user_id the + * entry belongs to, i.e. the owner_id. The collation rule for $O is + * COLLATION_NTOFS_SID. + * + * The $O index entry data is the user_id of the user corresponding to the SID. + * This user_id is used as an index into $Q to find the quota control entry + * associated with the SID. + * + * The $Q index entry data is the quota control entry and is defined below. + */ +typedef struct { + u32 version; /* Currently equals 2. */ + QUOTA_FLAGS flags; /* Flags describing this quota entry. */ + u64 bytes_used; /* How many bytes of the quota are in use. */ + s64 change_time; /* Last time this quota entry was changed. */ + s64 threshold; /* Soft quota (-1 if not limited). */ + s64 limit; /* Hard quota (-1 if not limited). */ + s64 exceeded_time; /* How long the soft quota has been exceeded. */ + SID sid; /* The SID of the user/object associated with + this quota entry. Equals zero for the quota + defaults entry. */ +} __attribute__ ((__packed__)) QUOTA_CONTROL_ENTRY; + +/* + * Predefined owner_id values (32-bit). + */ +typedef enum { + QUOTA_INVALID_ID = const_cpu_to_le32(0x00000000), + QUOTA_DEFAULTS_ID = const_cpu_to_le32(0x00000001), + QUOTA_FIRST_USER_ID = const_cpu_to_le32(0x00000100), +} PREDEFINED_OWNER_IDS; + +/* + * Index entry flags (16-bit). + */ +typedef enum { + INDEX_ENTRY_NODE = const_cpu_to_le16(1), /* This entry contains a sub-node, + i.e. a reference to an index + block in form of a virtual + cluster number (see below). */ + INDEX_ENTRY_END = const_cpu_to_le16(2), /* This signifies the last entry in + an index block. The index entry + does not represent a file but it + can point to a sub-node. */ + INDEX_ENTRY_SPACE_FILLER = 0xffff, /* Just to force 16-bit width. */ +} __attribute__ ((__packed__)) INDEX_ENTRY_FLAGS; + +/* + * This the index entry header (see below). + */ +typedef struct { +/* 0*/ union { + struct { /* Only valid when INDEX_ENTRY_END is not set. */ + MFT_REF indexed_file; /* The mft reference of the file + described by this index + entry. Used for directory + indexes. */ + } __attribute__ ((__packed__)) dir; + struct { /* Used for views/indexes to find the entry's data. */ + u16 data_offset; /* Data byte offset from this + INDEX_ENTRY. Follows the + index key. */ + u16 data_length; /* Data length in bytes. */ + u32 reservedV; /* Reserved (zero). */ + } __attribute__ ((__packed__)) vi; + } __attribute__ ((__packed__)) data; +/* 8*/ u16 length; /* Byte size of this index entry, multiple of + 8-bytes. */ +/* 10*/ u16 key_length; /* Byte size of the key value, which is in the + index entry. It follows field reserved. Not + multiple of 8-bytes. */ +/* 12*/ INDEX_ENTRY_FLAGS flags; /* Bit field of INDEX_ENTRY_* flags. */ +/* 14*/ u16 reserved; /* Reserved/align to 8-byte boundary. */ +/* sizeof() = 16 bytes */ +} __attribute__ ((__packed__)) INDEX_ENTRY_HEADER; + +/* + * This is an index entry. A sequence of such entries follows each INDEX_HEADER + * structure. Together they make up a complete index. The index follows either + * an index root attribute or an index allocation attribute. + * + * NOTE: Before NTFS 3.0 only filename attributes were indexed. + */ +typedef struct { +/*Ofs*/ +/* 0 INDEX_ENTRY_HEADER; -- Unfolded here as gcc dislikes unnamed structs. */ + union { + struct { /* Only valid when INDEX_ENTRY_END is not set. */ + MFT_REF indexed_file; /* The mft reference of the file + described by this index + entry. Used for directory + indexes. */ + } __attribute__ ((__packed__)) dir; + struct { /* Used for views/indexes to find the entry's data. */ + u16 data_offset; /* Data byte offset from this + INDEX_ENTRY. Follows the + index key. */ + u16 data_length; /* Data length in bytes. */ + u32 reservedV; /* Reserved (zero). */ + } __attribute__ ((__packed__)) vi; + } __attribute__ ((__packed__)) data; + u16 length; /* Byte size of this index entry, multiple of + 8-bytes. */ + u16 key_length; /* Byte size of the key value, which is in the + index entry. It follows field reserved. Not + multiple of 8-bytes. */ + INDEX_ENTRY_FLAGS flags; /* Bit field of INDEX_ENTRY_* flags. */ + u16 reserved; /* Reserved/align to 8-byte boundary. */ + +/* 16*/ union { /* The key of the indexed attribute. NOTE: Only present + if INDEX_ENTRY_END bit in flags is not set. NOTE: On + NTFS versions before 3.0 the only valid key is the + FILE_NAME_ATTR. On NTFS 3.0+ the following + additional index keys are defined: */ + FILE_NAME_ATTR file_name;/* $I30 index in directories. */ + SII_INDEX_KEY sii; /* $SII index in $Secure. */ + SDH_INDEX_KEY sdh; /* $SDH index in $Secure. */ + GUID object_id; /* $O index in FILE_Extend/$ObjId: The + object_id of the mft record found in + the data part of the index. */ + REPARSE_INDEX_KEY reparse; /* $R index in + FILE_Extend/$Reparse. */ + SID sid; /* $O index in FILE_Extend/$Quota: + SID of the owner of the user_id. */ + u32 owner_id; /* $Q index in FILE_Extend/$Quota: + user_id of the owner of the quota + control entry in the data part of + the index. */ + } __attribute__ ((__packed__)) key; + /* The (optional) index data is inserted here when creating. */ + // VCN vcn; /* If INDEX_ENTRY_NODE bit in flags is set, the last + // eight bytes of this index entry contain the virtual + // cluster number of the index block that holds the + // entries immediately preceding the current entry (the + // vcn references the corresponding cluster in the data + // of the non-resident index allocation attribute). If + // the key_length is zero, then the vcn immediately + // follows the INDEX_ENTRY_HEADER. Regardless of + // key_length, the address of the 8-byte boundary + // alligned vcn of INDEX_ENTRY{_HEADER} *ie is given by + // (char*)ie + le16_to_cpu(ie*)->length) - sizeof(VCN), + // where sizeof(VCN) can be hardcoded as 8 if wanted. */ +} __attribute__ ((__packed__)) INDEX_ENTRY; + +/* + * Attribute: Bitmap (0xb0). + * + * Contains an array of bits (aka a bitfield). + * + * When used in conjunction with the index allocation attribute, each bit + * corresponds to one index block within the index allocation attribute. Thus + * the number of bits in the bitmap * index block size / cluster size is the + * number of clusters in the index allocation attribute. + */ +typedef struct { + u8 bitmap[0]; /* Array of bits. */ +} __attribute__ ((__packed__)) BITMAP_ATTR; + +/* + * The reparse point tag defines the type of the reparse point. It also + * includes several flags, which further describe the reparse point. + * + * The reparse point tag is an unsigned 32-bit value divided in three parts: + * + * 1. The least significant 16 bits (i.e. bits 0 to 15) specifiy the type of + * the reparse point. + * 2. The 13 bits after this (i.e. bits 16 to 28) are reserved for future use. + * 3. The most significant three bits are flags describing the reparse point. + * They are defined as follows: + * bit 29: Name surrogate bit. If set, the filename is an alias for + * another object in the system. + * bit 30: High-latency bit. If set, accessing the first byte of data will + * be slow. (E.g. the data is stored on a tape drive.) + * bit 31: Microsoft bit. If set, the tag is owned by Microsoft. User + * defined tags have to use zero here. + */ +typedef enum { + IO_REPARSE_TAG_IS_ALIAS = const_cpu_to_le32(0x20000000), + IO_REPARSE_TAG_IS_HIGH_LATENCY = const_cpu_to_le32(0x40000000), + IO_REPARSE_TAG_IS_MICROSOFT = const_cpu_to_le32(0x80000000), + + IO_REPARSE_TAG_RESERVED_ZERO = const_cpu_to_le32(0x00000000), + IO_REPARSE_TAG_RESERVED_ONE = const_cpu_to_le32(0x00000001), + IO_REPARSE_TAG_RESERVED_RANGE = const_cpu_to_le32(0x00000001), + + IO_REPARSE_TAG_NSS = const_cpu_to_le32(0x68000005), + IO_REPARSE_TAG_NSS_RECOVER = const_cpu_to_le32(0x68000006), + IO_REPARSE_TAG_SIS = const_cpu_to_le32(0x68000007), + IO_REPARSE_TAG_DFS = const_cpu_to_le32(0x68000008), + + IO_REPARSE_TAG_MOUNT_POINT = const_cpu_to_le32(0x88000003), + + IO_REPARSE_TAG_HSM = const_cpu_to_le32(0xa8000004), + + IO_REPARSE_TAG_SYMBOLIC_LINK = const_cpu_to_le32(0xe8000000), + + IO_REPARSE_TAG_VALID_VALUES = const_cpu_to_le32(0xe000ffff), +} PREDEFINED_REPARSE_TAGS; + +/* + * Attribute: Reparse point (0xc0). + * + * NOTE: Can be resident or non-resident. + */ +typedef struct { + u32 reparse_tag; /* Reparse point type (inc. flags). */ + u16 reparse_data_length; /* Byte size of reparse data. */ + u16 reserved; /* Align to 8-byte boundary. */ + u8 reparse_data[0]; /* Meaning depends on reparse_tag. */ +} __attribute__ ((__packed__)) REPARSE_POINT; + +/* + * Attribute: Extended attribute (EA) information (0xd0). + * + * NOTE: Always resident. (Is this true???) + */ +typedef struct { + u16 ea_length; /* Byte size of the packed extended + attributes. */ + u16 need_ea_count; /* The number of extended attributes which have + the NEED_EA bit set. */ + u32 ea_query_length; /* Byte size of the buffer required to query + the extended attributes when calling + ZwQueryEaFile() in Windows NT/2k. I.e. the + byte size of the unpacked extended + attributes. */ +} __attribute__ ((__packed__)) EA_INFORMATION; + +/* + * Extended attribute flags (8-bit). + */ +typedef enum { + NEED_EA = 0x80, +} __attribute__ ((__packed__)) EA_FLAGS; + +/* + * Attribute: Extended attribute (EA) (0xe0). + * + * NOTE: Always non-resident. (Is this true?) + * + * Like the attribute list and the index buffer list, the EA attribute value is + * a sequence of EA_ATTR variable length records. + * + * FIXME: It appears weird that the EA name is not unicode. Is it true? + */ +typedef struct { + u32 next_entry_offset; /* Offset to the next EA_ATTR. */ + EA_FLAGS flags; /* Flags describing the EA. */ + u8 ea_name_length; /* Length of the name of the EA in bytes. */ + u16 ea_value_length; /* Byte size of the EA's value. */ + u8 ea_name[0]; /* Name of the EA. */ + u8 ea_value[0]; /* The value of the EA. Immediately follows + the name. */ +} __attribute__ ((__packed__)) EA_ATTR; + +/* + * Attribute: Property set (0xf0). + * + * Intended to support Native Structure Storage (NSS) - a feature removed from + * NTFS 3.0 during beta testing. + */ +typedef struct { + /* Irrelevant as feature unused. */ +} __attribute__ ((__packed__)) PROPERTY_SET; + +/* + * Attribute: Logged utility stream (0x100). + * + * NOTE: Can be resident or non-resident. + * + * Operations on this attribute are logged to the journal ($LogFile) like + * normal metadata changes. + * + * Used by the Encrypting File System (EFS). All encrypted files have this + * attribute with the name $EFS. + */ +typedef struct { + /* Can be anything the creator chooses. */ + /* EFS uses it as follows: */ + // FIXME: Type this info, verifying it along the way. (AIA) +} __attribute__ ((__packed__)) LOGGED_UTILITY_STREAM, EFS_ATTR; + +#endif /* _LINUX_NTFS_LAYOUT_H */ + diff -urN linux-2.4.24-vanilla/fs/ntfs/macros.h linux-2.4.24-ntfs-2.1.6a/fs/ntfs/macros.h --- linux-2.4.24-vanilla/fs/ntfs/macros.h 2001-09-08 20:24:40.000000000 +0100 +++ linux-2.4.24-ntfs-2.1.6a/fs/ntfs/macros.h 1970-01-01 01:00:00.000000000 +0100 @@ -1,43 +0,0 @@ -/* - * macros.h - * - * Copyright (C) 1995 Martin von Löwis - * Copyright (C) 1996 Régis Duchesne - * Copyright (c) 2001 Anton Altaparmakov - */ -#include -#include -#include - -#define NTFS_FD(vol) ((vol)->u.fd) - -#define NTFS_SB(vol) ((struct super_block*)(vol)->sb) -#define NTFS_SB2VOL(sb) (&(sb)->u.ntfs_sb) -#define NTFS_INO2VOL(ino) (&((ino)->i_sb->u.ntfs_sb)) -#define NTFS_LINO2NINO(ino) ((struct ntfs_inode_info*)(&((ino)->u.ntfs_i))) -static inline struct inode *VFS_I(struct ntfs_inode_info *ntfs_ino) -{ - struct inode *i = (struct inode*)((char*)ntfs_ino - - ((char*)&(((struct inode*)NULL)->u.ntfs_i) - - (char*)NULL)); -#ifdef DEBUG - if ((char*)NTFS_LINO2NINO(i) != (char*)ntfs_ino) - BUG(); -#endif - return i; -} - -#define IS_MAGIC(a,b) (*(int*)(a) == *(int*)(b)) -#define IS_MFT_RECORD(a) IS_MAGIC((a),"FILE") -#define IS_INDEX_RECORD(a) IS_MAGIC((a),"INDX") - -/* 'NTFS' in little endian */ -#define NTFS_SUPER_MAGIC 0x5346544E - -#define NTFS_AFLAG_RO 1 -#define NTFS_AFLAG_HIDDEN 2 -#define NTFS_AFLAG_SYSTEM 4 -#define NTFS_AFLAG_ARCHIVE 20 -#define NTFS_AFLAG_COMPRESSED 0x800 -#define NTFS_AFLAG_DIR 0x10000000 - diff -urN linux-2.4.24-vanilla/fs/ntfs/malloc.h linux-2.4.24-ntfs-2.1.6a/fs/ntfs/malloc.h --- linux-2.4.24-vanilla/fs/ntfs/malloc.h 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.4.24-ntfs-2.1.6a/fs/ntfs/malloc.h 2004-01-21 14:31:43.000000000 +0000 @@ -0,0 +1,64 @@ +/* + * malloc.h - NTFS kernel memory handling. Part of the Linux-NTFS project. + * + * Copyright (c) 2001,2002 Anton Altaparmakov. + * + * This program/include file is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as published + * by the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program/include file is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program (in the main directory of the Linux-NTFS + * distribution in the file COPYING); if not, write to the Free Software + * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _LINUX_NTFS_MALLOC_H +#define _LINUX_NTFS_MALLOC_H + +#include +#include + +/** + * ntfs_malloc_nofs - allocate memory in multiples of pages + * @size number of bytes to allocate + * + * Allocates @size bytes of memory, rounded up to multiples of PAGE_SIZE and + * returns a pointer to the allocated memory. + * + * If there was insufficient memory to complete the request, return NULL. + */ +static inline void *ntfs_malloc_nofs(unsigned long size) +{ + if (likely(size <= PAGE_SIZE)) { + if (likely(size)) { + /* kmalloc() has per-CPU caches so if faster for now. */ + return kmalloc(PAGE_SIZE, GFP_NOFS); + /* return (void *)__get_free_page(GFP_NOFS | + __GFP_HIGHMEM); */ + } + BUG(); + } + if (likely(size >> PAGE_SHIFT < num_physpages)) + return __vmalloc(size, GFP_NOFS | __GFP_HIGHMEM, PAGE_KERNEL); + return NULL; +} + +static inline void ntfs_free(void *addr) +{ + if (likely(((unsigned long)addr < VMALLOC_START) || + ((unsigned long)addr >= VMALLOC_END ))) { + return kfree(addr); + /* return free_page((unsigned long)addr); */ + } + vfree(addr); +} + +#endif /* _LINUX_NTFS_MALLOC_H */ + diff -urN linux-2.4.24-vanilla/fs/ntfs/mft.c linux-2.4.24-ntfs-2.1.6a/fs/ntfs/mft.c --- linux-2.4.24-vanilla/fs/ntfs/mft.c 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.4.24-ntfs-2.1.6a/fs/ntfs/mft.c 2004-01-21 14:28:25.000000000 +0000 @@ -0,0 +1,433 @@ +/** + * mft.c - NTFS kernel mft record operations. Part of the Linux-NTFS project. + * + * Copyright (c) 2001-2003 Anton Altaparmakov + * Copyright (c) 2002 Richard Russon + * + * This program/include file is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as published + * by the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program/include file is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program (in the main directory of the Linux-NTFS + * distribution in the file COPYING); if not, write to the Free Software + * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include + +#include "ntfs.h" + +/** + * __format_mft_record - initialize an empty mft record + * @m: mapped, pinned and locked for writing mft record + * @size: size of the mft record + * @rec_no: mft record number / inode number + * + * Private function to initialize an empty mft record. Use one of the two + * provided format_mft_record() functions instead. + */ +static void __format_mft_record(MFT_RECORD *m, const int size, + const unsigned long rec_no) +{ + ATTR_RECORD *a; + + memset(m, 0, size); + m->magic = magic_FILE; + /* Aligned to 2-byte boundary. */ + m->usa_ofs = cpu_to_le16((sizeof(MFT_RECORD) + 1) & ~1); + m->usa_count = cpu_to_le16(size / NTFS_BLOCK_SIZE + 1); + /* Set the update sequence number to 1. */ + *(u16*)((char*)m + ((sizeof(MFT_RECORD) + 1) & ~1)) = cpu_to_le16(1); + m->lsn = cpu_to_le64(0LL); + m->sequence_number = cpu_to_le16(1); + m->link_count = cpu_to_le16(0); + /* Aligned to 8-byte boundary. */ + m->attrs_offset = cpu_to_le16((le16_to_cpu(m->usa_ofs) + + (le16_to_cpu(m->usa_count) << 1) + 7) & ~7); + m->flags = cpu_to_le16(0); + /* + * Using attrs_offset plus eight bytes (for the termination attribute), + * aligned to 8-byte boundary. + */ + m->bytes_in_use = cpu_to_le32((le16_to_cpu(m->attrs_offset) + 8 + 7) & + ~7); + m->bytes_allocated = cpu_to_le32(size); + m->base_mft_record = cpu_to_le64((MFT_REF)0); + m->next_attr_instance = cpu_to_le16(0); + a = (ATTR_RECORD*)((char*)m + le16_to_cpu(m->attrs_offset)); + a->type = AT_END; + a->length = cpu_to_le32(0); +} + +/** + * format_mft_record - initialize an empty mft record + * @ni: ntfs inode of mft record + * @mft_rec: mapped, pinned and locked mft record (optional) + * + * Initialize an empty mft record. This is used when extending the MFT. + * + * If @mft_rec is NULL, we call map_mft_record() to obtain the + * record and we unmap it again when finished. + * + * We return 0 on success or -errno on error. + */ +int format_mft_record(ntfs_inode *ni, MFT_RECORD *mft_rec) +{ + MFT_RECORD *m; + + if (mft_rec) + m = mft_rec; + else { + m = map_mft_record(ni); + if (IS_ERR(m)) + return PTR_ERR(m); + } + __format_mft_record(m, ni->vol->mft_record_size, ni->mft_no); + if (!mft_rec) { + // FIXME: Need to set the mft record dirty! + unmap_mft_record(ni); + } + return 0; +} + +/** + * ntfs_readpage - external declaration, function is in fs/ntfs/aops.c + */ +extern int ntfs_readpage(struct file *, struct page *); + +/** + * ntfs_mft_aops - address space operations for access to $MFT + * + * Address space operations for access to $MFT. This allows us to simply use + * ntfs_map_page() in map_mft_record_page(). + */ +struct address_space_operations ntfs_mft_aops = { + .readpage = ntfs_readpage, /* Fill page with data. */ + .sync_page = block_sync_page, /* Currently, just unplugs the + disk request queue. */ +}; + +/** + * map_mft_record_page - map the page in which a specific mft record resides + * @ni: ntfs inode whose mft record page to map + * + * This maps the page in which the mft record of the ntfs inode @ni is situated + * and returns a pointer to the mft record within the mapped page. + * + * Return value needs to be checked with IS_ERR() and if that is true PTR_ERR() + * contains the negative error code returned. + */ +static inline MFT_RECORD *map_mft_record_page(ntfs_inode *ni) +{ + ntfs_volume *vol = ni->vol; + struct inode *mft_vi = vol->mft_ino; + struct page *page; + unsigned long index, ofs, end_index; + + BUG_ON(ni->page); + /* + * The index into the page cache and the offset within the page cache + * page of the wanted mft record. FIXME: We need to check for + * overflowing the unsigned long, but I don't think we would ever get + * here if the volume was that big... + */ + index = ni->mft_no << vol->mft_record_size_bits >> PAGE_CACHE_SHIFT; + ofs = (ni->mft_no << vol->mft_record_size_bits) & ~PAGE_CACHE_MASK; + + /* The maximum valid index into the page cache for $MFT's data. */ + end_index = mft_vi->i_size >> PAGE_CACHE_SHIFT; + + /* If the wanted index is out of bounds the mft record doesn't exist. */ + if (unlikely(index >= end_index)) { + if (index > end_index || (mft_vi->i_size & ~PAGE_CACHE_MASK) < + ofs + vol->mft_record_size) { + page = ERR_PTR(-ENOENT); + goto err_out; + } + } + /* Read, map, and pin the page. */ + page = ntfs_map_page(mft_vi->i_mapping, index); + if (likely(!IS_ERR(page))) { + ni->page = page; + ni->page_ofs = ofs; + return page_address(page) + ofs; + } +err_out: + ni->page = NULL; + ni->page_ofs = 0; + ntfs_error(vol->sb, "Failed with error code %lu.", -PTR_ERR(page)); + return (void*)page; +} + +/** + * map_mft_record - map, pin and lock an mft record + * @ni: ntfs inode whose MFT record to map + * + * First, take the mrec_lock semaphore. We might now be sleeping, while waiting + * for the semaphore if it was already locked by someone else. + * + * The page of the record is mapped using map_mft_record_page() before being + * returned to the caller. + * + * This in turn uses ntfs_map_page() to get the page containing the wanted mft + * record (it in turn calls read_cache_page() which reads it in from disk if + * necessary, increments the use count on the page so that it cannot disappear + * under us and returns a reference to the page cache page). + * + * If read_cache_page() invokes ntfs_readpage() to load the page from disk, it + * sets PG_locked and clears PG_uptodate on the page. Once I/O has completed + * and the post-read mst fixups on each mft record in the page have been + * performed, the page gets PG_uptodate set and PG_locked cleared (this is done + * in our asynchronous I/O completion handler end_buffer_read_mft_async()). + * ntfs_map_page() waits for PG_locked to become clear and checks if + * PG_uptodate is set and returns an error code if not. This provides + * sufficient protection against races when reading/using the page. + * + * However there is the write mapping to think about. Doing the above described + * checking here will be fine, because when initiating the write we will set + * PG_locked and clear PG_uptodate making sure nobody is touching the page + * contents. Doing the locking this way means that the commit to disk code in + * the page cache code paths is automatically sufficiently locked with us as + * we will not touch a page that has been locked or is not uptodate. The only + * locking problem then is them locking the page while we are accessing it. + * + * So that code will end up having to own the mrec_lock of all mft + * records/inodes present in the page before I/O can proceed. In that case we + * wouldn't need to bother with PG_locked and PG_uptodate as nobody will be + * accessing anything without owning the mrec_lock semaphore. But we do need + * to use them because of the read_cache_page() invokation and the code becomes + * so much simpler this way that it is well worth it. + * + * The mft record is now ours and we return a pointer to it. You need to check + * the returned pointer with IS_ERR() and if that is true, PTR_ERR() will return + * the error code. + * + * NOTE: Caller is responsible for setting the mft record dirty before calling + * unmap_mft_record(). This is obviously only necessary if the caller really + * modified the mft record... + * Q: Do we want to recycle one of the VFS inode state bits instead? + * A: No, the inode ones mean we want to change the mft record, not we want to + * write it out. + */ +MFT_RECORD *map_mft_record(ntfs_inode *ni) +{ + MFT_RECORD *m; + + ntfs_debug("Entering for mft_no 0x%lx.", ni->mft_no); + + /* Make sure the ntfs inode doesn't go away. */ + atomic_inc(&ni->count); + + /* Serialize access to this mft record. */ + down(&ni->mrec_lock); + + m = map_mft_record_page(ni); + if (likely(!IS_ERR(m))) + return m; + + up(&ni->mrec_lock); + atomic_dec(&ni->count); + ntfs_error(ni->vol->sb, "Failed with error code %lu.", -PTR_ERR(m)); + return m; +} + +/** + * unmap_mft_record_page - unmap the page in which a specific mft record resides + * @ni: ntfs inode whose mft record page to unmap + * + * This unmaps the page in which the mft record of the ntfs inode @ni is + * situated and returns. This is a NOOP if highmem is not configured. + * + * The unmap happens via ntfs_unmap_page() which in turn decrements the use + * count on the page thus releasing it from the pinned state. + * + * We do not actually unmap the page from memory of course, as that will be + * done by the page cache code itself when memory pressure increases or + * whatever. + */ +static inline void unmap_mft_record_page(ntfs_inode *ni) +{ + BUG_ON(!ni->page); + + // TODO: If dirty, blah... + ntfs_unmap_page(ni->page); + ni->page = NULL; + ni->page_ofs = 0; + return; +} + +/** + * unmap_mft_record - release a mapped mft record + * @ni: ntfs inode whose MFT record to unmap + * + * We release the page mapping and the mrec_lock mutex which unmaps the mft + * record and releases it for others to get hold of. We also release the ntfs + * inode by decrementing the ntfs inode reference count. + * + * NOTE: If caller has modified the mft record, it is imperative to set the mft + * record dirty BEFORE calling unmap_mft_record(). + */ +void unmap_mft_record(ntfs_inode *ni) +{ + struct page *page = ni->page; + + BUG_ON(!page); + + ntfs_debug("Entering for mft_no 0x%lx.", ni->mft_no); + + unmap_mft_record_page(ni); + up(&ni->mrec_lock); + atomic_dec(&ni->count); + /* + * If pure ntfs_inode, i.e. no vfs inode attached, we leave it to + * ntfs_clear_extent_inode() in the extent inode case, and to the + * caller in the non-extent, yet pure ntfs inode case, to do the actual + * tear down of all structures and freeing of all allocated memory. + */ + return; +} + +/** + * map_extent_mft_record - load an extent inode and attach it to its base + * @base_ni: base ntfs inode + * @mref: mft reference of the extent inode to load (in little endian) + * @ntfs_ino: on successful return, pointer to the ntfs_inode structure + * + * Load the extent mft record @mref and attach it to its base inode @base_ni. + * Return the mapped extent mft record if IS_ERR(result) is false. Otherwise + * PTR_ERR(result) gives the negative error code. + * + * On successful return, @ntfs_ino contains a pointer to the ntfs_inode + * structure of the mapped extent inode. + */ +MFT_RECORD *map_extent_mft_record(ntfs_inode *base_ni, MFT_REF mref, + ntfs_inode **ntfs_ino) +{ + MFT_RECORD *m; + ntfs_inode *ni = NULL; + ntfs_inode **extent_nis = NULL; + int i; + unsigned long mft_no = MREF_LE(mref); + u16 seq_no = MSEQNO_LE(mref); + BOOL destroy_ni = FALSE; + + ntfs_debug("Mapping extent mft record 0x%lx (base mft record 0x%lx).", + mft_no, base_ni->mft_no); + /* Make sure the base ntfs inode doesn't go away. */ + atomic_inc(&base_ni->count); + /* + * Check if this extent inode has already been added to the base inode, + * in which case just return it. If not found, add it to the base + * inode before returning it. + */ + down(&base_ni->extent_lock); + if (base_ni->nr_extents > 0) { + extent_nis = base_ni->ext.extent_ntfs_inos; + for (i = 0; i < base_ni->nr_extents; i++) { + if (mft_no != extent_nis[i]->mft_no) + continue; + ni = extent_nis[i]; + /* Make sure the ntfs inode doesn't go away. */ + atomic_inc(&ni->count); + break; + } + } + if (likely(ni != NULL)) { + up(&base_ni->extent_lock); + atomic_dec(&base_ni->count); + /* We found the record; just have to map and return it. */ + m = map_mft_record(ni); + /* map_mft_record() has incremented this on success. */ + atomic_dec(&ni->count); + if (likely(!IS_ERR(m))) { + /* Verify the sequence number. */ + if (likely(le16_to_cpu(m->sequence_number) == seq_no)) { + ntfs_debug("Done 1."); + *ntfs_ino = ni; + return m; + } + unmap_mft_record(ni); + ntfs_error(base_ni->vol->sb, "Found stale extent mft " + "reference! Corrupt file system. " + "Run chkdsk."); + return ERR_PTR(-EIO); + } +map_err_out: + ntfs_error(base_ni->vol->sb, "Failed to map extent " + "mft record, error code %ld.", -PTR_ERR(m)); + return m; + } + /* Record wasn't there. Get a new ntfs inode and initialize it. */ + ni = ntfs_new_extent_inode(base_ni->vol->sb, mft_no); + if (unlikely(!ni)) { + up(&base_ni->extent_lock); + atomic_dec(&base_ni->count); + return ERR_PTR(-ENOMEM); + } + ni->vol = base_ni->vol; + ni->seq_no = seq_no; + ni->nr_extents = -1; + ni->ext.base_ntfs_ino = base_ni; + /* Now map the record. */ + m = map_mft_record(ni); + if (unlikely(IS_ERR(m))) { + up(&base_ni->extent_lock); + atomic_dec(&base_ni->count); + ntfs_clear_extent_inode(ni); + goto map_err_out; + } + /* Verify the sequence number. */ + if (unlikely(le16_to_cpu(m->sequence_number) != seq_no)) { + ntfs_error(base_ni->vol->sb, "Found stale extent mft " + "reference! Corrupt file system. Run chkdsk."); + destroy_ni = TRUE; + m = ERR_PTR(-EIO); + goto unm_err_out; + } + /* Attach extent inode to base inode, reallocating memory if needed. */ + if (!(base_ni->nr_extents & 3)) { + ntfs_inode **tmp; + int new_size = (base_ni->nr_extents + 4) * sizeof(ntfs_inode *); + + tmp = (ntfs_inode **)kmalloc(new_size, GFP_NOFS); + if (unlikely(!tmp)) { + ntfs_error(base_ni->vol->sb, "Failed to allocate " + "internal buffer."); + destroy_ni = TRUE; + m = ERR_PTR(-ENOMEM); + goto unm_err_out; + } + if (base_ni->ext.extent_ntfs_inos) { + memcpy(tmp, base_ni->ext.extent_ntfs_inos, new_size - + 4 * sizeof(ntfs_inode *)); + kfree(base_ni->ext.extent_ntfs_inos); + } + base_ni->ext.extent_ntfs_inos = tmp; + } + base_ni->ext.extent_ntfs_inos[base_ni->nr_extents++] = ni; + up(&base_ni->extent_lock); + atomic_dec(&base_ni->count); + ntfs_debug("Done 2."); + *ntfs_ino = ni; + return m; +unm_err_out: + unmap_mft_record(ni); + up(&base_ni->extent_lock); + atomic_dec(&base_ni->count); + /* + * If the extent inode was not attached to the base inode we need to + * release it or we will leak memory. + */ + if (destroy_ni) + ntfs_clear_extent_inode(ni); + return m; +} + diff -urN linux-2.4.24-vanilla/fs/ntfs/mft.h linux-2.4.24-ntfs-2.1.6a/fs/ntfs/mft.h --- linux-2.4.24-vanilla/fs/ntfs/mft.h 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.4.24-ntfs-2.1.6a/fs/ntfs/mft.h 2004-01-21 14:31:43.000000000 +0000 @@ -0,0 +1,61 @@ +/* + * mft.h - Defines for mft record handling in NTFS Linux kernel driver. + * Part of the Linux-NTFS project. + * + * Copyright (c) 2001,2002 Anton Altaparmakov. + * + * This program/include file is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as published + * by the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program/include file is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program (in the main directory of the Linux-NTFS + * distribution in the file COPYING); if not, write to the Free Software + * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _LINUX_NTFS_MFT_H +#define _LINUX_NTFS_MFT_H + +#include + +#include "inode.h" + +extern int format_mft_record(ntfs_inode *ni, MFT_RECORD *m); +//extern int format_mft_record2(struct super_block *vfs_sb, +// const unsigned long inum, MFT_RECORD *m); + +extern MFT_RECORD *map_mft_record(ntfs_inode *ni); +extern void unmap_mft_record(ntfs_inode *ni); + +extern MFT_RECORD *map_extent_mft_record(ntfs_inode *base_ni, MFT_REF mref, + ntfs_inode **ntfs_ino); + +static inline void unmap_extent_mft_record(ntfs_inode *ni) +{ + unmap_mft_record(ni); + return; +} + +/** + * flush_dcache_mft_record_page - flush_dcache_page() for mft records + * @ni: ntfs inode structure of mft record + * + * Call flush_dcache_page() for the page in which an mft record resides. + * + * This must be called every time an mft record is modified, just after the + * modification. + */ +static inline void flush_dcache_mft_record_page(ntfs_inode *ni) +{ + flush_dcache_page(ni->page); +} + +#endif /* _LINUX_NTFS_MFT_H */ + diff -urN linux-2.4.24-vanilla/fs/ntfs/mst.c linux-2.4.24-ntfs-2.1.6a/fs/ntfs/mst.c --- linux-2.4.24-vanilla/fs/ntfs/mst.c 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.4.24-ntfs-2.1.6a/fs/ntfs/mst.c 2004-01-21 14:28:25.000000000 +0000 @@ -0,0 +1,202 @@ +/* + * mst.c - NTFS multi sector transfer protection handling code. Part of the + * Linux-NTFS project. + * + * Copyright (c) 2001 Anton Altaparmakov. + * + * This program/include file is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as published + * by the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program/include file is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program (in the main directory of the Linux-NTFS + * distribution in the file COPYING); if not, write to the Free Software + * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "ntfs.h" + +/** + * post_read_mst_fixup - deprotect multi sector transfer protected data + * @b: pointer to the data to deprotect + * @size: size in bytes of @b + * + * Perform the necessary post read multi sector transfer fixup and detect the + * presence of incomplete multi sector transfers. - In that case, overwrite the + * magic of the ntfs record header being processed with "BAAD" (in memory only!) + * and abort processing. + * + * Return 0 on success and -EINVAL on error ("BAAD" magic will be present). + * + * NOTE: We consider the absence / invalidity of an update sequence array to + * mean that the structure is not protected at all and hence doesn't need to + * be fixed up. Thus, we return success and not failure in this case. This is + * in contrast to pre_write_mst_fixup(), see below. + */ +int post_read_mst_fixup(NTFS_RECORD *b, const u32 size) +{ + u16 usa_ofs, usa_count, usn; + u16 *usa_pos, *data_pos; + + /* Setup the variables. */ + usa_ofs = le16_to_cpu(b->usa_ofs); + /* Decrement usa_count to get number of fixups. */ + usa_count = le16_to_cpu(b->usa_count) - 1; + /* Size and alignment checks. */ + if ( size & (NTFS_BLOCK_SIZE - 1) || + usa_ofs & 1 || + usa_ofs + (usa_count * 2) > size || + (size >> NTFS_BLOCK_SIZE_BITS) != usa_count) + return 0; + /* Position of usn in update sequence array. */ + usa_pos = (u16*)b + usa_ofs/sizeof(u16); + /* + * The update sequence number which has to be equal to each of the + * u16 values before they are fixed up. Note no need to care for + * endianness since we are comparing and moving data for on disk + * structures which means the data is consistent. - If it is + * consistenty the wrong endianness it doesn't make any difference. + */ + usn = *usa_pos; + /* + * Position in protected data of first u16 that needs fixing up. + */ + data_pos = (u16*)b + NTFS_BLOCK_SIZE/sizeof(u16) - 1; + /* + * Check for incomplete multi sector transfer(s). + */ + while (usa_count--) { + if (*data_pos != usn) { + /* + * Incomplete multi sector transfer detected! )-: + * Set the magic to "BAAD" and return failure. + * Note that magic_BAAD is already converted to le32. + */ + b->magic = magic_BAAD; + return -EINVAL; + } + data_pos += NTFS_BLOCK_SIZE/sizeof(u16); + } + /* Re-setup the variables. */ + usa_count = le16_to_cpu(b->usa_count) - 1; + data_pos = (u16*)b + NTFS_BLOCK_SIZE/sizeof(u16) - 1; + /* Fixup all sectors. */ + while (usa_count--) { + /* + * Increment position in usa and restore original data from + * the usa into the data buffer. + */ + *data_pos = *(++usa_pos); + /* Increment position in data as well. */ + data_pos += NTFS_BLOCK_SIZE/sizeof(u16); + } + return 0; +} + +/** + * pre_write_mst_fixup - apply multi sector transfer protection + * @b: pointer to the data to protect + * @size: size in bytes of @b + * + * Perform the necessary pre write multi sector transfer fixup on the data + * pointer to by @b of @size. + * + * Return 0 if fixup applied (success) or -EINVAL if no fixup was performed + * (assumed not needed). This is in contrast to post_read_mst_fixup() above. + * + * NOTE: We consider the absence / invalidity of an update sequence array to + * mean that the structure is not subject to protection and hence doesn't need + * to be fixed up. This means that you have to create a valid update sequence + * array header in the ntfs record before calling this function, otherwise it + * will fail (the header needs to contain the position of the update seqeuence + * array together with the number of elements in the array). You also need to + * initialise the update sequence number before calling this function + * otherwise a random word will be used (whatever was in the record at that + * position at that time). + */ +int pre_write_mst_fixup(NTFS_RECORD *b, const u32 size) +{ + u16 usa_ofs, usa_count, usn; + u16 *usa_pos, *data_pos; + + /* Sanity check + only fixup if it makes sense. */ + if (!b || is_baad_record(b->magic) || is_hole_record(b->magic)) + return -EINVAL; + /* Setup the variables. */ + usa_ofs = le16_to_cpu(b->usa_ofs); + /* Decrement usa_count to get number of fixups. */ + usa_count = le16_to_cpu(b->usa_count) - 1; + /* Size and alignment checks. */ + if ( size & (NTFS_BLOCK_SIZE - 1) || + usa_ofs & 1 || + usa_ofs + (usa_count * 2) > size || + (size >> NTFS_BLOCK_SIZE_BITS) != usa_count) + return -EINVAL; + /* Position of usn in update sequence array. */ + usa_pos = (u16*)((u8*)b + usa_ofs); + /* + * Cyclically increment the update sequence number + * (skipping 0 and -1, i.e. 0xffff). + */ + usn = le16_to_cpup(usa_pos) + 1; + if (usn == 0xffff || !usn) + usn = 1; + usn = cpu_to_le16(usn); + *usa_pos = usn; + /* Position in data of first u16 that needs fixing up. */ + data_pos = (u16*)b + NTFS_BLOCK_SIZE/sizeof(u16) - 1; + /* Fixup all sectors. */ + while (usa_count--) { + /* + * Increment the position in the usa and save the + * original data from the data buffer into the usa. + */ + *(++usa_pos) = *data_pos; + /* Apply fixup to data. */ + *data_pos = usn; + /* Increment position in data as well. */ + data_pos += NTFS_BLOCK_SIZE/sizeof(u16); + } + return 0; +} + +/** + * post_write_mst_fixup - fast deprotect multi sector transfer protected data + * @b: pointer to the data to deprotect + * + * Perform the necessary post write multi sector transfer fixup, not checking + * for any errors, because we assume we have just used pre_write_mst_fixup(), + * thus the data will be fine or we would never have gotten here. + */ +void post_write_mst_fixup(NTFS_RECORD *b) +{ + u16 *usa_pos, *data_pos; + + u16 usa_ofs = le16_to_cpu(b->usa_ofs); + u16 usa_count = le16_to_cpu(b->usa_count) - 1; + + /* Position of usn in update sequence array. */ + usa_pos = (u16*)b + usa_ofs/sizeof(u16); + + /* Position in protected data of first u16 that needs fixing up. */ + data_pos = (u16*)b + NTFS_BLOCK_SIZE/sizeof(u16) - 1; + + /* Fixup all sectors. */ + while (usa_count--) { + /* + * Increment position in usa and restore original data from + * the usa into the data buffer. + */ + *data_pos = *(++usa_pos); + + /* Increment position in data as well. */ + data_pos += NTFS_BLOCK_SIZE/sizeof(u16); + } +} + diff -urN linux-2.4.24-vanilla/fs/ntfs/namei.c linux-2.4.24-ntfs-2.1.6a/fs/ntfs/namei.c --- linux-2.4.24-vanilla/fs/ntfs/namei.c 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.4.24-ntfs-2.1.6a/fs/ntfs/namei.c 2004-01-21 14:28:25.000000000 +0000 @@ -0,0 +1,298 @@ +/* + * namei.c - NTFS kernel directory inode operations. Part of the Linux-NTFS + * project. + * + * Copyright (c) 2001-2003 Anton Altaparmakov + * + * This program/include file is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as published + * by the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program/include file is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program (in the main directory of the Linux-NTFS + * distribution in the file COPYING); if not, write to the Free Software + * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include +#include + +#include "ntfs.h" +#include "dir.h" + +/** + * ntfs_lookup - find the inode represented by a dentry in a directory inode + * @dir_ino: directory inode in which to look for the inode + * @dent: dentry representing the inode to look for + * + * In short, ntfs_lookup() looks for the inode represented by the dentry @dent + * in the directory inode @dir_ino and if found attaches the inode to the + * dentry @dent. + * + * In more detail, the dentry @dent specifies which inode to look for by + * supplying the name of the inode in @dent->d_name.name. ntfs_lookup() + * converts the name to Unicode and walks the contents of the directory inode + * @dir_ino looking for the converted Unicode name. If the name is found in the + * directory, the corresponding inode is loaded by calling ntfs_iget() on its + * inode number and the inode is associated with the dentry @dent via a call to + * d_add(). + * + * If the name is not found in the directory, a NULL inode is inserted into the + * dentry @dent. The dentry is then termed a negative dentry. + * + * Only if an actual error occurs, do we return an error via ERR_PTR(). + * + * In order to handle the case insensitivity issues of NTFS with regards to the + * dcache and the dcache requiring only one dentry per directory, we deal with + * dentry aliases that only differ in case in ->ntfs_lookup() while maintining + * a case sensitive dcache. This means that we get the full benefit of dcache + * speed when the file/directory is looked up with the same case as returned by + * ->ntfs_readdir() but that a lookup for any other case (or for the short file + * name) will not find anything in dcache and will enter ->ntfs_lookup() + * instead, where we search the directory for a fully matching file name + * (including case) and if that is not found, we search for a file name that + * matches with different case and if that has non-POSIX semantics we return + * that. We actually do only one search (case sensitive) and keep tabs on + * whether we have found a case insensitive match in the process. + * + * To simplify matters for us, we do not treat the short vs long filenames as + * two hard links but instead if the lookup matches a short filename, we + * return the dentry for the corresponding long filename instead. + * + * There are three cases we need to distinguish here: + * + * 1) @dent perfectly matches (i.e. including case) a directory entry with a + * file name in the WIN32 or POSIX namespaces. In this case + * ntfs_lookup_inode_by_name() will return with name set to NULL and we + * just d_add() @dent. + * 2) @dent matches (not including case) a directory entry with a file name in + * the WIN32 namespace. In this case ntfs_lookup_inode_by_name() will return + * with name set to point to a kmalloc()ed ntfs_name structure containing + * the properly cased little endian Unicode name. We convert the name to the + * current NLS code page, search if a dentry with this name already exists + * and if so return that instead of @dent. The VFS will then destroy the old + * @dent and use the one we returned. If a dentry is not found, we allocate + * a new one, d_add() it, and return it as above. + * 3) @dent matches either perfectly or not (i.e. we don't care about case) a + * directory entry with a file name in the DOS namespace. In this case + * ntfs_lookup_inode_by_name() will return with name set to point to a + * kmalloc()ed ntfs_name structure containing the mft reference (cpu endian) + * of the inode. We use the mft reference to read the inode and to find the + * file name in the WIN32 namespace corresponding to the matched short file + * name. We then convert the name to the current NLS code page, and proceed + * searching for a dentry with this name, etc, as in case 2), above. + */ +static struct dentry *ntfs_lookup(struct inode *dir_ino, struct dentry *dent) +{ + ntfs_volume *vol = NTFS_SB(dir_ino->i_sb); + struct inode *dent_inode; + uchar_t *uname; + ntfs_name *name = NULL; + MFT_REF mref; + unsigned long dent_ino; + int uname_len; + + ntfs_debug("Looking up %s in directory inode 0x%lx.", + dent->d_name.name, dir_ino->i_ino); + /* Convert the name of the dentry to Unicode. */ + uname_len = ntfs_nlstoucs(vol, dent->d_name.name, dent->d_name.len, + &uname); + if (uname_len < 0) { + ntfs_error(vol->sb, "Failed to convert name to Unicode."); + return ERR_PTR(uname_len); + } + mref = ntfs_lookup_inode_by_name(NTFS_I(dir_ino), uname, uname_len, + &name); + kmem_cache_free(ntfs_name_cache, uname); + if (!IS_ERR_MREF(mref)) { + dent_ino = MREF(mref); + ntfs_debug("Found inode 0x%lx. Calling ntfs_iget.", dent_ino); + dent_inode = ntfs_iget(vol->sb, dent_ino); + if (likely(!IS_ERR(dent_inode))) { + /* Consistency check. */ + if (MSEQNO(mref) == NTFS_I(dent_inode)->seq_no || + dent_ino == FILE_MFT) { + /* Perfect WIN32/POSIX match. -- Case 1. */ + if (!name) { + d_add(dent, dent_inode); + ntfs_debug("Done."); + return NULL; + } + /* + * We are too indented. Handle imperfect + * matches and short file names further below. + */ + goto handle_name; + } + ntfs_error(vol->sb, "Found stale reference to inode " + "0x%lx (reference sequence number = " + "0x%x, inode sequence number = 0x%x, " + "returning -EIO. Run chkdsk.", + dent_ino, MSEQNO(mref), + NTFS_I(dent_inode)->seq_no); + iput(dent_inode); + dent_inode = ERR_PTR(-EIO); + } else + ntfs_error(vol->sb, "ntfs_iget(0x%lx) failed with " + "error code %li.", dent_ino, + PTR_ERR(dent_inode)); + if (name) + kfree(name); + /* Return the error code. */ + return (struct dentry *)dent_inode; + } + /* It is guaranteed that name is no longer allocated at this point. */ + if (MREF_ERR(mref) == -ENOENT) { + ntfs_debug("Entry was not found, adding negative dentry."); + /* The dcache will handle negative entries. */ + d_add(dent, NULL); + ntfs_debug("Done."); + return NULL; + } + ntfs_error(vol->sb, "ntfs_lookup_ino_by_name() failed with error " + "code %i.", -MREF_ERR(mref)); + return ERR_PTR(MREF_ERR(mref)); + + // TODO: Consider moving this lot to a separate function! (AIA) +handle_name: + { + struct dentry *real_dent; + MFT_RECORD *m; + attr_search_context *ctx; + ntfs_inode *ni = NTFS_I(dent_inode); + int err; + struct qstr nls_name; + + nls_name.name = NULL; + if (name->type != FILE_NAME_DOS) { /* Case 2. */ + nls_name.len = (unsigned)ntfs_ucstonls(vol, + (uchar_t*)&name->name, name->len, + (unsigned char**)&nls_name.name, + name->len * 3 + 1); + kfree(name); + } else /* if (name->type == FILE_NAME_DOS) */ { /* Case 3. */ + FILE_NAME_ATTR *fn; + + kfree(name); + + /* Find the WIN32 name corresponding to the matched DOS name. */ + ni = NTFS_I(dent_inode); + m = map_mft_record(ni); + if (IS_ERR(m)) { + err = PTR_ERR(m); + m = NULL; + ctx = NULL; + goto err_out; + } + ctx = get_attr_search_ctx(ni, m); + if (!ctx) { + err = -ENOMEM; + goto err_out; + } + do { + ATTR_RECORD *a; + u32 val_len; + + if (!lookup_attr(AT_FILE_NAME, NULL, 0, 0, 0, NULL, 0, + ctx)) { + ntfs_error(vol->sb, "Inode corrupt: No WIN32 " + "namespace counterpart to DOS " + "file name. Run chkdsk."); + err = -EIO; + goto err_out; + } + /* Consistency checks. */ + a = ctx->attr; + if (a->non_resident || a->flags) + goto eio_err_out; + val_len = le32_to_cpu(a->data.resident.value_length); + if (le16_to_cpu(a->data.resident.value_offset) + + val_len > le32_to_cpu(a->length)) + goto eio_err_out; + fn = (FILE_NAME_ATTR*)((u8*)ctx->attr + le16_to_cpu( + ctx->attr->data.resident.value_offset)); + if ((u32)(fn->file_name_length * sizeof(uchar_t) + + sizeof(FILE_NAME_ATTR)) > val_len) + goto eio_err_out; + } while (fn->file_name_type != FILE_NAME_WIN32); + + /* Convert the found WIN32 name to current NLS code page. */ + nls_name.len = (unsigned)ntfs_ucstonls(vol, + (uchar_t*)&fn->file_name, fn->file_name_length, + (unsigned char**)&nls_name.name, + fn->file_name_length * 3 + 1); + + put_attr_search_ctx(ctx); + unmap_mft_record(ni); + } + m = NULL; + ctx = NULL; + + /* Check if a conversion error occured. */ + if ((signed)nls_name.len < 0) { + err = (signed)nls_name.len; + goto err_out; + } + nls_name.hash = full_name_hash(nls_name.name, nls_name.len); + + /* + * Note: No need for dparent_lock as i_sem is held on the parent inode. + */ + + /* Does a dentry matching the nls_name exist already? */ + real_dent = d_lookup(dent->d_parent, &nls_name); + /* If not, create it now. */ + if (!real_dent) { + real_dent = d_alloc(dent->d_parent, &nls_name); + kfree(nls_name.name); + if (!real_dent) { + err = -ENOMEM; + goto err_out; + } + d_add(real_dent, dent_inode); + return real_dent; + } + kfree(nls_name.name); + /* Matching dentry exists, check if it is negative. */ + if (real_dent->d_inode) { + BUG_ON(real_dent->d_inode != dent_inode); + /* + * Already have the inode and the dentry attached, decrement + * the reference count to balance the ntfs_iget() we did + * earlier on. + */ + iput(dent_inode); + return real_dent; + } + /* Negative dentry: instantiate it. */ + d_instantiate(real_dent, dent_inode); + return real_dent; + +eio_err_out: + ntfs_error(vol->sb, "Illegal file name attribute. Run chkdsk."); + err = -EIO; +err_out: + if (ctx) + put_attr_search_ctx(ctx); + if (m) + unmap_mft_record(ni); + iput(dent_inode); + return ERR_PTR(err); + } +} + +/* + * Inode operations for directories. + */ +struct inode_operations ntfs_dir_inode_ops = { + .lookup = ntfs_lookup, /* VFS: Lookup directory. */ +}; + diff -urN linux-2.4.24-vanilla/fs/ntfs/ntfs.h linux-2.4.24-ntfs-2.1.6a/fs/ntfs/ntfs.h --- linux-2.4.24-vanilla/fs/ntfs/ntfs.h 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.4.24-ntfs-2.1.6a/fs/ntfs/ntfs.h 2004-01-21 14:31:43.000000000 +0000 @@ -0,0 +1,222 @@ +/* + * ntfs.h - Defines for NTFS Linux kernel driver. Part of the Linux-NTFS + * project. + * + * Copyright (c) 2001,2002 Anton Altaparmakov. + * Copyright (C) 2002 Richard Russon. + * + * This program/include file is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as published + * by the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program/include file is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program (in the main directory of the Linux-NTFS + * distribution in the file COPYING); if not, write to the Free Software + * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _LINUX_NTFS_H +#define _LINUX_NTFS_H + +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "types.h" +#include "debug.h" +#include "malloc.h" +#include "endian.h" +#include "volume.h" +#include "inode.h" +#include "layout.h" +#include "attrib.h" +#include "mft.h" + +#include "kcompat.h" + +typedef long sector_t; + +typedef enum { + NTFS_BLOCK_SIZE = 512, + NTFS_BLOCK_SIZE_BITS = 9, + NTFS_SB_MAGIC = 0x5346544e, /* 'NTFS' */ + NTFS_MAX_NAME_LEN = 255, +} NTFS_CONSTANTS; + +/* Global variables. */ + +/* Slab caches (from super.c). */ +extern kmem_cache_t *ntfs_name_cache; +extern kmem_cache_t *ntfs_inode_cache; +extern kmem_cache_t *ntfs_big_inode_cache; +extern kmem_cache_t *ntfs_attr_ctx_cache; + +/* The various operations structs defined throughout the driver files. */ +extern struct super_operations ntfs_sops; +extern struct super_operations ntfs_mount_sops; + +extern struct address_space_operations ntfs_aops; +extern struct address_space_operations ntfs_mft_aops; + +extern struct file_operations ntfs_file_ops; +extern struct inode_operations ntfs_file_inode_ops; + +extern struct file_operations ntfs_dir_ops; +extern struct inode_operations ntfs_dir_inode_ops; + +extern struct file_operations ntfs_empty_file_ops; +extern struct inode_operations ntfs_empty_inode_ops; + +/* Generic macro to convert pointers to values for comparison purposes. */ +#ifndef p2n +#define p2n(p) ((ptrdiff_t)((ptrdiff_t*)(p))) +#endif + +/** + * NTFS_SB - return the ntfs volume given a vfs super block + * @sb: VFS super block + * + * NTFS_SB() returns the ntfs volume associated with the VFS super block @sb. + */ +static inline ntfs_volume *NTFS_SB(struct super_block *sb) +{ + return sb->u.generic_sbp; +} + +/** + * ntfs_unmap_page - release a page that was mapped using ntfs_map_page() + * @page: the page to release + * + * Unpin, unmap and release a page that was obtained from ntfs_map_page(). + */ +static inline void ntfs_unmap_page(struct page *page) +{ + kunmap(page); + page_cache_release(page); +} + +/** + * ntfs_map_page - map a page into accessible memory, reading it if necessary + * @mapping: address space for which to obtain the page + * @index: index into the page cache for @mapping of the page to map + * + * Read a page from the page cache of the address space @mapping at position + * @index, where @index is in units of PAGE_CACHE_SIZE, and not in bytes. + * + * If the page is not in memory it is loaded from disk first using the readpage + * method defined in the address space operations of @mapping and the page is + * added to the page cache of @mapping in the process. + * + * If the page is in high memory it is mapped into memory directly addressible + * by the kernel. + * + * Finally the page count is incremented, thus pinning the page into place. + * + * The above means that page_address(page) can be used on all pages obtained + * with ntfs_map_page() to get the kernel virtual address of the page. + * + * When finished with the page, the caller has to call ntfs_unmap_page() to + * unpin, unmap and release the page. + * + * Note this does not grant exclusive access. If such is desired, the caller + * must provide it independently of the ntfs_{un}map_page() calls by using + * a {rw_}semaphore or other means of serialization. A spin lock cannot be + * used as ntfs_map_page() can block. + * + * The unlocked and uptodate page is returned on success or an encoded error + * on failure. Caller has to test for error using the IS_ERR() macro on the + * return value. If that evaluates to TRUE, the negative error code can be + * obtained using PTR_ERR() on the return value of ntfs_map_page(). + */ +static inline struct page *ntfs_map_page(struct address_space *mapping, + unsigned long index) +{ + struct page *page = read_cache_page(mapping, index, + (filler_t*)mapping->a_ops->readpage, NULL); + + if (!IS_ERR(page)) { + wait_on_page(page); + kmap(page); + if (PageUptodate(page) && !PageError(page)) + return page; + ntfs_unmap_page(page); + return ERR_PTR(-EIO); + } + return page; +} + +/* Declarations of functions and global variables. */ + +/* From fs/ntfs/compress.c */ +extern int ntfs_read_compressed_block(struct page *page); + +/* From fs/ntfs/super.c */ +#define default_upcase_len 0x10000 +extern wchar_t *default_upcase; +extern unsigned long ntfs_nr_upcase_users; +extern unsigned long ntfs_nr_mounts; +extern struct semaphore ntfs_lock; + +typedef struct { + int val; + char *str; +} option_t; +extern const option_t on_errors_arr[]; + +/* From fs/ntfs/compress.c */ +extern int allocate_compression_buffers(void); +extern void free_compression_buffers(void); + +/* From fs/ntfs/mst.c */ +extern int post_read_mst_fixup(NTFS_RECORD *b, const u32 size); +extern int pre_write_mst_fixup(NTFS_RECORD *b, const u32 size); +extern void post_write_mst_fixup(NTFS_RECORD *b); + +/* From fs/ntfs/time.c */ +extern inline s64 utc2ntfs(const time_t time); +extern inline s64 get_current_ntfs_time(void); +extern inline time_t ntfs2utc(const s64 time); + +/* From fs/ntfs/unistr.c */ +extern BOOL ntfs_are_names_equal(const uchar_t *s1, size_t s1_len, + const uchar_t *s2, size_t s2_len, + const IGNORE_CASE_BOOL ic, + const uchar_t *upcase, const u32 upcase_size); +extern int ntfs_collate_names(const uchar_t *name1, const u32 name1_len, + const uchar_t *name2, const u32 name2_len, + const int err_val, const IGNORE_CASE_BOOL ic, + const uchar_t *upcase, const u32 upcase_len); +extern int ntfs_ucsncmp(const uchar_t *s1, const uchar_t *s2, size_t n); +extern int ntfs_ucsncasecmp(const uchar_t *s1, const uchar_t *s2, size_t n, + const uchar_t *upcase, const u32 upcase_size); +extern void ntfs_upcase_name(uchar_t *name, u32 name_len, + const uchar_t *upcase, const u32 upcase_len); +extern void ntfs_file_upcase_value(FILE_NAME_ATTR *file_name_attr, + const uchar_t *upcase, const u32 upcase_len); +extern int ntfs_file_compare_values(FILE_NAME_ATTR *file_name_attr1, + FILE_NAME_ATTR *file_name_attr2, + const int err_val, const IGNORE_CASE_BOOL ic, + const uchar_t *upcase, const u32 upcase_len); +extern int ntfs_nlstoucs(const ntfs_volume *vol, const char *ins, + const int ins_len, uchar_t **outs); +extern int ntfs_ucstonls(const ntfs_volume *vol, const uchar_t *ins, + const int ins_len, unsigned char **outs, int outs_len); + +/* From fs/ntfs/upcase.c */ +extern uchar_t *generate_default_upcase(void); + +#endif /* _LINUX_NTFS_H */ + diff -urN linux-2.4.24-vanilla/fs/ntfs/ntfsendian.h linux-2.4.24-ntfs-2.1.6a/fs/ntfs/ntfsendian.h --- linux-2.4.24-vanilla/fs/ntfs/ntfsendian.h 2001-07-16 23:14:10.000000000 +0100 +++ linux-2.4.24-ntfs-2.1.6a/fs/ntfs/ntfsendian.h 1970-01-01 01:00:00.000000000 +0100 @@ -1,60 +0,0 @@ -/* - * ntfsendian.h - * - * Copyright (C) 1998, 1999 Martin von Löwis - * Copyright (C) 1998 Joseph Malicki - * Copyright (C) 1999 Werner Seiler - * Copyright (C) 2001 Anton Altaparmakov (AIA) - */ -#include - -#define CPU_TO_LE16(a) __cpu_to_le16(a) -#define CPU_TO_LE32(a) __cpu_to_le32(a) -#define CPU_TO_LE64(a) __cpu_to_le64(a) - -#define LE16_TO_CPU(a) __cpu_to_le16(a) -#define LE32_TO_CPU(a) __cpu_to_le32(a) -#define LE64_TO_CPU(a) __cpu_to_le64(a) - -#define NTFS_GETU8(p) (*(ntfs_u8*)(p)) -#define NTFS_GETU16(p) ((ntfs_u16)LE16_TO_CPU(*(ntfs_u16*)(p))) -#define NTFS_GETU24(p) ((ntfs_u32)NTFS_GETU16(p) | \ - ((ntfs_u32)NTFS_GETU8(((char*)(p)) + 2) << 16)) -#define NTFS_GETU32(p) ((ntfs_u32)LE32_TO_CPU(*(ntfs_u32*)(p))) -#define NTFS_GETU40(p) ((ntfs_u64)NTFS_GETU32(p) | \ - (((ntfs_u64)NTFS_GETU8(((char*)(p)) + 4)) << 32)) -#define NTFS_GETU48(p) ((ntfs_u64)NTFS_GETU32(p) | \ - (((ntfs_u64)NTFS_GETU16(((char*)(p)) + 4)) << 32)) -#define NTFS_GETU56(p) ((ntfs_u64)NTFS_GETU32(p) | \ - (((ntfs_u64)NTFS_GETU24(((char*)(p)) + 4)) << 32)) -#define NTFS_GETU64(p) ((ntfs_u64)LE64_TO_CPU(*(ntfs_u64*)(p))) - - /* Macros writing unsigned integers */ -#define NTFS_PUTU8(p,v) ((*(ntfs_u8*)(p)) = (v)) -#define NTFS_PUTU16(p,v) ((*(ntfs_u16*)(p)) = CPU_TO_LE16(v)) -#define NTFS_PUTU24(p,v) NTFS_PUTU16(p, (v) & 0xFFFF);\ - NTFS_PUTU8(((char*)(p)) + 2, (v) >> 16) -#define NTFS_PUTU32(p,v) ((*(ntfs_u32*)(p)) = CPU_TO_LE32(v)) -#define NTFS_PUTU64(p,v) ((*(ntfs_u64*)(p)) = CPU_TO_LE64(v)) - - /* Macros reading signed integers */ -#define NTFS_GETS8(p) ((*(ntfs_s8*)(p))) -#define NTFS_GETS16(p) ((ntfs_s16)LE16_TO_CPU(*(short*)(p))) -#define NTFS_GETS24(p) (NTFS_GETU24(p) < 0x800000 ? \ - (int)NTFS_GETU24(p) : \ - (int)(NTFS_GETU24(p) - 0x1000000)) -#define NTFS_GETS32(p) ((ntfs_s32)LE32_TO_CPU(*(int*)(p))) -#define NTFS_GETS40(p) (((ntfs_s64)NTFS_GETU32(p)) | \ - (((ntfs_s64)NTFS_GETS8(((char*)(p)) + 4)) << 32)) -#define NTFS_GETS48(p) (((ntfs_s64)NTFS_GETU32(p)) | \ - (((ntfs_s64)NTFS_GETS16(((char*)(p)) + 4)) << 32)) -#define NTFS_GETS56(p) (((ntfs_s64)NTFS_GETU32(p)) | \ - (((ntfs_s64)NTFS_GETS24(((char*)(p)) + 4)) << 32)) -#define NTFS_GETS64(p) ((ntfs_s64)NTFS_GETU64(p)) - -#define NTFS_PUTS8(p,v) NTFS_PUTU8(p,v) -#define NTFS_PUTS16(p,v) NTFS_PUTU16(p,v) -#define NTFS_PUTS24(p,v) NTFS_PUTU24(p,v) -#define NTFS_PUTS32(p,v) NTFS_PUTU32(p,v) -#define NTFS_PUTS64(p,v) NTFS_PUTU64(p,v) - diff -urN linux-2.4.24-vanilla/fs/ntfs/ntfstypes.h linux-2.4.24-ntfs-2.1.6a/fs/ntfs/ntfstypes.h --- linux-2.4.24-vanilla/fs/ntfs/ntfstypes.h 2001-07-16 23:14:10.000000000 +0100 +++ linux-2.4.24-ntfs-2.1.6a/fs/ntfs/ntfstypes.h 1970-01-01 01:00:00.000000000 +0100 @@ -1,84 +0,0 @@ -/* - * ntfstypes.h - This file defines four things: - * - Generic platform independent fixed-size types (e.g. ntfs_u32). - * - Specific fixed-size types (e.g. ntfs_offset_t). - * - Macros that read and write those types from and to byte arrays. - * - Types derived from OS specific ones. - * - * Copyright (C) 1996, 1998, 1999 Martin von Löwis - * Copyright (C) 2001 Anton Altaparmakov (AIA) - */ -#include -#include "ntfsendian.h" -#include - -/* Integral types */ -#ifndef NTFS_INTEGRAL_TYPES -#define NTFS_INTEGRAL_TYPES -typedef u8 ntfs_u8; -typedef u16 ntfs_u16; -typedef u32 ntfs_u32; -typedef u64 ntfs_u64; -typedef s8 ntfs_s8; -typedef s16 ntfs_s16; -typedef s32 ntfs_s32; -typedef s64 ntfs_s64; -#endif - -/* Unicode character type */ -#ifndef NTFS_WCHAR_T -#define NTFS_WCHAR_T -typedef u16 ntfs_wchar_t; -#endif -/* File offset */ -#ifndef NTFS_OFFSET_T -#define NTFS_OFFSET_T -typedef s64 ntfs_offset_t; -#endif -/* UTC */ -#ifndef NTFS_TIME64_T -#define NTFS_TIME64_T -typedef u64 ntfs_time64_t; -#endif -/* - * This is really signed long long. So we support only volumes up to 2Tb. This - * is ok as Win2k also only uses 32-bits to store clusters. - * Whatever you do keep this a SIGNED value or a lot of NTFS users with - * corrupted filesystems will lynch you! It causes massive fs corruption when - * unsigned due to the nature of many checks relying on being performed on - * signed quantities. (AIA) - */ -#ifndef NTFS_CLUSTER_T -#define NTFS_CLUSTER_T -typedef s32 ntfs_cluster_t; -#endif - -/* Architecture independent macros. */ - -/* PUTU32 would not clear all bytes. */ -#define NTFS_PUTINUM(p,i) NTFS_PUTU64(p, i->i_number); \ - NTFS_PUTU16(((char*)p) + 6, i->sequence_number) - -/* System dependent types. */ -#include -#ifndef NTMODE_T -#define NTMODE_T -typedef __kernel_mode_t ntmode_t; -#endif -#ifndef NTFS_UID_T -#define NTFS_UID_T -typedef uid_t ntfs_uid_t; -#endif -#ifndef NTFS_GID_T -#define NTFS_GID_T -typedef gid_t ntfs_gid_t; -#endif -#ifndef NTFS_SIZE_T -#define NTFS_SIZE_T -typedef __kernel_size_t ntfs_size_t; -#endif -#ifndef NTFS_TIME_T -#define NTFS_TIME_T -typedef __kernel_time_t ntfs_time_t; -#endif - diff -urN linux-2.4.24-vanilla/fs/ntfs/struct.h linux-2.4.24-ntfs-2.1.6a/fs/ntfs/struct.h --- linux-2.4.24-vanilla/fs/ntfs/struct.h 2001-09-08 20:24:40.000000000 +0100 +++ linux-2.4.24-ntfs-2.1.6a/fs/ntfs/struct.h 1970-01-01 01:00:00.000000000 +0100 @@ -1,69 +0,0 @@ -/* - * struct.h - Structure definitions - * - * Copyright (C) 1997 Régis Duchesne - * Copyright (C) 2000-2001 Anton Altaparmakov (AIA) - */ -#include - -/* Necessary forward definition. */ -struct ntfs_inode; - -/* Which files should be returned from a director listing. */ -#define ngt_dos 1 /* only short names, no system files */ -#define ngt_nt 2 /* only long names, all-uppercase becomes - * all-lowercase, no system files */ -#define ngt_posix 3 /* all names except system files */ -#define ngt_full 4 /* all entries */ - -typedef struct ntfs_sb_info ntfs_volume; - -typedef struct { - ntfs_cluster_t lcn; - ntfs_cluster_t len; -} ntfs_runlist; - -typedef struct ntfs_attribute { - int type; - ntfs_u16 *name; - int namelen; - int attrno; - __s64 size, allocated, initialized, compsize; - ATTR_FLAGS flags; - __u8 resident, indexed; - int cengine; - union { - void *data; /* if resident */ - struct { - ntfs_runlist *runlist; - unsigned long len; - } r; - } d; -} ntfs_attribute; - -typedef struct ntfs_inode_info ntfs_inode; - -/* Structure to define IO to user buffer. do_read means that the destination - * has to be written using fn_put, do_write means that the destination has to - * read using fn_get. So, do_read is from a user's point of view, while put and - * get are from the driver's point of view. The first argument is always the - * destination of the IO. */ -typedef struct ntfs_io{ - int do_read; - void (*fn_put)(struct ntfs_io *dest, void *buf, ntfs_size_t); - void (*fn_get)(void *buf, struct ntfs_io *src, ntfs_size_t len); - void *param; - unsigned long size; -} ntfs_io; - -#if 0 -typedef struct { - ntfs_volume *vol; - ntfs_inode *ino; - int type; - char *name; - int mftno; - int start_vcn; -} ntfs_attrlist_item; -#endif - diff -urN linux-2.4.24-vanilla/fs/ntfs/super.c linux-2.4.24-ntfs-2.1.6a/fs/ntfs/super.c --- linux-2.4.24-vanilla/fs/ntfs/super.c 2003-11-28 18:26:21.000000000 +0000 +++ linux-2.4.24-ntfs-2.1.6a/fs/ntfs/super.c 2004-01-21 14:28:25.000000000 +0000 @@ -1,1416 +1,1773 @@ /* - * super.c + * super.c - NTFS kernel super block handling. Part of the Linux-NTFS project. * - * Copyright (C) 1995-1997, 1999 Martin von Löwis - * Copyright (C) 1996-1997 Régis Duchesne - * Copyright (C) 1999 Steve Dodd - * Copyright (C) 2000-2001 Anton Altparmakov (AIA) + * Copyright (c) 2001-2003 Anton Altaparmakov + * Copyright (c) 2001,2002 Richard Russon + * + * This program/include file is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as published + * by the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program/include file is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program (in the main directory of the Linux-NTFS + * distribution in the file COPYING); if not, write to the Free Software + * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -#include -#include -#include -#include -#include "ntfstypes.h" -#include "struct.h" -#include "super.h" -#include "macros.h" -#include "inode.h" -#include "support.h" -#include "util.h" -#include - -/* All important structures in NTFS use 2 consistency checks: - * . a magic structure identifier (FILE, INDX, RSTR, RCRD...) - * . a fixup technique : the last word of each sector (called a fixup) of a - * structure's record should end with the word at offset of the first - * sector, and if it is the case, must be replaced with the words following - * . The value of and the number of fixups is taken from the fields - * at the offsets 4 and 6. Note that the sector size is defined as - * NTFS_SECTOR_SIZE and not as the hardware sector size (this is concordant - * with what the Windows NTFS driver does). - * - * This function performs these 2 checks, and _fails_ if: - * . the input size is invalid - * . the fixup header is invalid - * . the size does not match the number of sectors - * . the magic identifier is wrong - * . a fixup is invalid +#include +#include +#include +#include +#include +#include /* For gendisk stuff. */ +#include /* Fox get_hardsect_size. */ + +#include "ntfs.h" +#include "sysctl.h" + +/* Number of mounted file systems which have compression enabled. */ +static unsigned long ntfs_nr_compression_users = 0; + +/* Error constants/strings used in inode.c::ntfs_show_options(). */ +typedef enum { + /* One of these must be present, default is ON_ERRORS_CONTINUE. */ + ON_ERRORS_PANIC = 0x01, + ON_ERRORS_REMOUNT_RO = 0x02, + ON_ERRORS_CONTINUE = 0x04, + /* Optional, can be combined with any of the above. */ + ON_ERRORS_RECOVER = 0x10, +} ON_ERRORS_ACTIONS; + +const option_t on_errors_arr[] = { + { ON_ERRORS_PANIC, "panic" }, + { ON_ERRORS_REMOUNT_RO, "remount-ro", }, + { ON_ERRORS_CONTINUE, "continue", }, + { ON_ERRORS_RECOVER, "recover" }, + { 0, NULL } +}; + +/** + * simple_getbool - + * + * Copied from old ntfs driver (which copied from vfat driver). */ -int ntfs_fixup_record(char *record, char *magic, int size) +static int simple_getbool(char *s, BOOL *setval) { - int start, count, offset; - ntfs_u16 fixup; - - if (!IS_MAGIC(record, magic)) - return 0; - start = NTFS_GETU16(record + 4); - count = NTFS_GETU16(record + 6) - 1; - if (size & (NTFS_SECTOR_SIZE - 1) || start & 1 || - start + count * 2 > size || size >> 9 != count) { - if (size <= 0) - printk(KERN_ERR "NTFS: BUG: ntfs_fixup_record() got " - "zero size! Please report this to " - "linux-ntfs-dev@lists.sf.net\n"); - return 0; - } - fixup = NTFS_GETU16(record + start); - start += 2; - offset = NTFS_SECTOR_SIZE - 2; - while (count--) { - if (NTFS_GETU16(record + offset) != fixup) + if (s) { + if (!strcmp(s, "1") || !strcmp(s, "yes") || !strcmp(s, "true")) + *setval = TRUE; + else if (!strcmp(s, "0") || !strcmp(s, "no") || + !strcmp(s, "false")) + *setval = FALSE; + else return 0; - NTFS_PUTU16(record + offset, NTFS_GETU16(record + start)); - start += 2; - offset += NTFS_SECTOR_SIZE; - } + } else + *setval = TRUE; return 1; } -/* - * Get vital informations about the ntfs partition from the boot sector. - * Return 0 on success or -1 on error. +/** + * parse_options - parse the (re)mount options + * @vol: ntfs volume + * @opt: string containing the (re)mount options + * + * Parse the recognized options in @opt for the ntfs volume described by @vol. + */ +static BOOL parse_options(ntfs_volume *vol, char *opt) +{ + char *p, *v, *ov; + static char *utf8 = "utf8"; + int errors = 0, sloppy = 0; + uid_t uid = (uid_t)-1; + gid_t gid = (gid_t)-1; + mode_t fmask = (mode_t)-1, dmask = (mode_t)-1; + int mft_zone_multiplier = -1, on_errors = -1; + int show_sys_files = -1, case_sensitive = -1; + struct nls_table *nls_map = NULL, *old_nls; + + /* I am lazy... (-8 */ +#define NTFS_GETOPT_WITH_DEFAULT(option, variable, default_value) \ + if (!strcmp(p, option)) { \ + if (!v || !*v) \ + variable = default_value; \ + else { \ + variable = simple_strtoul(ov = v, &v, 0); \ + if (*v) \ + goto needs_val; \ + } \ + } +#define NTFS_GETOPT(option, variable) \ + if (!strcmp(p, option)) { \ + if (!v || !*v) \ + goto needs_arg; \ + variable = simple_strtoul(ov = v, &v, 0); \ + if (*v) \ + goto needs_val; \ + } +#define NTFS_GETOPT_BOOL(option, variable) \ + if (!strcmp(p, option)) { \ + BOOL val; \ + if (!simple_getbool(v, &val)) \ + goto needs_bool; \ + variable = val; \ + } +#define NTFS_GETOPT_OPTIONS_ARRAY(option, variable, opt_array) \ + if (!strcmp(p, option)) { \ + int _i; \ + if (!v || !*v) \ + goto needs_arg; \ + ov = v; \ + if (variable == -1) \ + variable = 0; \ + for (_i = 0; opt_array[_i].str && *opt_array[_i].str; _i++) \ + if (!strcmp(opt_array[_i].str, v)) { \ + variable |= opt_array[_i].val; \ + break; \ + } \ + if (!opt_array[_i].str || !*opt_array[_i].str) \ + goto needs_val; \ + } + if (!opt || !*opt) + goto no_mount_options; + ntfs_debug("Entering with mount options string: %s", opt); + while ((p = strsep(&opt, ","))) { + if ((v = strchr(p, '='))) + *v++ = '\0'; + NTFS_GETOPT("uid", uid) + else NTFS_GETOPT("gid", gid) + else NTFS_GETOPT("umask", fmask = dmask) + else NTFS_GETOPT("fmask", fmask) + else NTFS_GETOPT("dmask", dmask) + else NTFS_GETOPT("mft_zone_multiplier", mft_zone_multiplier) + else NTFS_GETOPT_WITH_DEFAULT("sloppy", sloppy, TRUE) + else NTFS_GETOPT_BOOL("show_sys_files", show_sys_files) + else NTFS_GETOPT_BOOL("case_sensitive", case_sensitive) + else NTFS_GETOPT_OPTIONS_ARRAY("errors", on_errors, + on_errors_arr) + else if (!strcmp(p, "posix") || !strcmp(p, "show_inodes")) + ntfs_warning(vol->sb, "Ignoring obsolete option %s.", + p); + else if (!strcmp(p, "nls") || !strcmp(p, "iocharset")) { + if (!strcmp(p, "iocharset")) + ntfs_warning(vol->sb, "Option iocharset is " + "deprecated. Please use " + "option nls= in " + "the future."); + if (!v || !*v) + goto needs_arg; +use_utf8: + old_nls = nls_map; + nls_map = load_nls(v); + if (!nls_map) { + if (!old_nls) { + ntfs_error(vol->sb, "NLS character set " + "%s not found.", v); + return FALSE; + } + ntfs_error(vol->sb, "NLS character set %s not " + "found. Using previous one %s.", + v, old_nls->charset); + nls_map = old_nls; + } else /* nls_map */ { + if (old_nls) + unload_nls(old_nls); + } + } else if (!strcmp(p, "utf8")) { + BOOL val = FALSE; + ntfs_warning(vol->sb, "Option utf8 is no longer " + "supported, using option nls=utf8. Please " + "use option nls=utf8 in the future and " + "make sure utf8 is compiled either as a " + "module or into the kernel."); + if (!v || !*v) + val = TRUE; + else if (!simple_getbool(v, &val)) + goto needs_bool; + if (val) { + v = utf8; + goto use_utf8; + } + } else { + ntfs_error(vol->sb, "Unrecognized mount option %s.", p); + if (errors < INT_MAX) + errors++; + } +#undef NTFS_GETOPT_OPTIONS_ARRAY +#undef NTFS_GETOPT_BOOL +#undef NTFS_GETOPT +#undef NTFS_GETOPT_WITH_DEFAULT + } +no_mount_options: + if (errors && !sloppy) + return FALSE; + if (sloppy) + ntfs_warning(vol->sb, "Sloppy option given. Ignoring " + "unrecognized mount option(s) and continuing."); + /* Keep this first! */ + if (on_errors != -1) { + if (!on_errors) { + ntfs_error(vol->sb, "Invalid errors option argument " + "or bug in options parser."); + return FALSE; + } + } + if (nls_map) { + if (vol->nls_map && vol->nls_map != nls_map) { + ntfs_error(vol->sb, "Cannot change NLS character set " + "on remount."); + return FALSE; + } /* else (!vol->nls_map) */ + ntfs_debug("Using NLS character set %s.", nls_map->charset); + vol->nls_map = nls_map; + } else /* (!nls_map) */ { + if (!vol->nls_map) { + vol->nls_map = load_nls_default(); + if (!vol->nls_map) { + ntfs_error(vol->sb, "Failed to load default " + "NLS character set."); + return FALSE; + } + ntfs_debug("Using default NLS character set (%s).", + vol->nls_map->charset); + } + } + if (mft_zone_multiplier != -1) { + if (vol->mft_zone_multiplier && vol->mft_zone_multiplier != + mft_zone_multiplier) { + ntfs_error(vol->sb, "Cannot change mft_zone_multiplier " + "on remount."); + return FALSE; + } + if (mft_zone_multiplier < 1 || mft_zone_multiplier > 4) { + ntfs_error(vol->sb, "Invalid mft_zone_multiplier. " + "Using default value, i.e. 1."); + mft_zone_multiplier = 1; + } + vol->mft_zone_multiplier = mft_zone_multiplier; + } + if (!vol->mft_zone_multiplier) + vol->mft_zone_multiplier = 1; + if (on_errors != -1) + vol->on_errors = on_errors; + if (!vol->on_errors || vol->on_errors == ON_ERRORS_RECOVER) + vol->on_errors |= ON_ERRORS_CONTINUE; + if (uid != (uid_t)-1) + vol->uid = uid; + if (gid != (gid_t)-1) + vol->gid = gid; + if (fmask != (mode_t)-1) + vol->fmask = fmask; + if (dmask != (mode_t)-1) + vol->dmask = dmask; + if (show_sys_files != -1) { + if (show_sys_files) + NVolSetShowSystemFiles(vol); + else + NVolClearShowSystemFiles(vol); + } + if (case_sensitive != -1) { + if (case_sensitive) + NVolSetCaseSensitive(vol); + else + NVolClearCaseSensitive(vol); + } + return TRUE; +needs_arg: + ntfs_error(vol->sb, "The %s option requires an argument.", p); + return FALSE; +needs_bool: + ntfs_error(vol->sb, "The %s option requires a boolean argument.", p); + return FALSE; +needs_val: + ntfs_error(vol->sb, "Invalid %s option argument: %s", p, ov); + return FALSE; +} + +/** + * ntfs_remount - change the mount options of a mounted ntfs filesystem + * @sb: superblock of mounted ntfs filesystem + * @flags: remount flags + * @opt: remount options string + * + * Change the mount options of an already mounted ntfs filesystem. + * + * NOTE: The VFS set the @sb->s_flags remount flags to @flags after + * ntfs_remount() returns successfully (i.e. returns 0). Otherwise, + * @sb->s_flags are not changed. + */ +static int ntfs_remount(struct super_block *sb, int *flags, char *opt) +{ + ntfs_volume *vol = NTFS_SB(sb); + + ntfs_debug("Entering with remount options string: %s", opt); + +#ifndef NTFS_RW + /* For read-only compiled driver, enforce all read-only flags. */ + *flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME; +#else + /* + * For the read-write compiled driver, if we are remounting read-write, + * make sure there aren't any volume errors. + */ + if ((sb->s_flags & MS_RDONLY) && !(*flags & MS_RDONLY)) { + if (NVolErrors(vol)) { + ntfs_error(sb, "Volume has errors and is read-only." + "Cannot remount read-write."); + return -EROFS; + } + } +#endif + + // FIXME/TODO: If left like this we will have problems with rw->ro and + // ro->rw, as well as with sync->async and vice versa remounts. + // Note: The VFS already checks that there are no pending deletes and + // no open files for writing. So we only need to worry about dirty + // inode pages and dirty system files (which include dirty inodes). + // Either handle by flushing the whole volume NOW or by having the + // write routines work on MS_RDONLY fs and guarantee we don't mark + // anything as dirty if MS_RDONLY is set. That way the dirty data + // would get flushed but no new dirty data would appear. This is + // probably best but we need to be careful not to mark anything dirty + // or the MS_RDONLY will be leaking writes. + + // TODO: Deal with *flags. + + if (!parse_options(vol, opt)) + return -EINVAL; + + return 0; +} + +/** + * is_boot_sector_ntfs - check whether a boot sector is a valid NTFS boot sector + * @sb: Super block of the device to which @b belongs. + * @b: Boot sector of device @sb to check. + * @silent: If TRUE, all output will be silenced. + * + * is_boot_sector_ntfs() checks whether the boot sector @b is a valid NTFS boot + * sector. Returns TRUE if it is valid and FALSE if not. + * + * @sb is only needed for warning/error output, i.e. it can be NULL when silent + * is TRUE. + */ +static BOOL is_boot_sector_ntfs(const struct super_block *sb, + const NTFS_BOOT_SECTOR *b, const BOOL silent) +{ + /* + * Check that checksum == sum of u32 values from b to the checksum + * field. If checksum is zero, no checking is done. + */ + if ((void*)b < (void*)&b->checksum && b->checksum) { + u32 i, *u; + for (i = 0, u = (u32*)b; u < (u32*)(&b->checksum); ++u) + i += le32_to_cpup(u); + if (le32_to_cpu(b->checksum) != i) + goto not_ntfs; + } + /* Check OEMidentifier is "NTFS " */ + if (b->oem_id != magicNTFS) + goto not_ntfs; + /* Check bytes per sector value is between 256 and 4096. */ + if (le16_to_cpu(b->bpb.bytes_per_sector) < 0x100 || + le16_to_cpu(b->bpb.bytes_per_sector) > 0x1000) + goto not_ntfs; + /* Check sectors per cluster value is valid. */ + switch (b->bpb.sectors_per_cluster) { + case 1: case 2: case 4: case 8: case 16: case 32: case 64: case 128: + break; + default: + goto not_ntfs; + } + /* Check the cluster size is not above 65536 bytes. */ + if ((u32)le16_to_cpu(b->bpb.bytes_per_sector) * + b->bpb.sectors_per_cluster > 0x10000) + goto not_ntfs; + /* Check reserved/unused fields are really zero. */ + if (le16_to_cpu(b->bpb.reserved_sectors) || + le16_to_cpu(b->bpb.root_entries) || + le16_to_cpu(b->bpb.sectors) || + le16_to_cpu(b->bpb.sectors_per_fat) || + le32_to_cpu(b->bpb.large_sectors) || b->bpb.fats) + goto not_ntfs; + /* Check clusters per file mft record value is valid. */ + if ((u8)b->clusters_per_mft_record < 0xe1 || + (u8)b->clusters_per_mft_record > 0xf7) + switch (b->clusters_per_mft_record) { + case 1: case 2: case 4: case 8: case 16: case 32: case 64: + break; + default: + goto not_ntfs; + } + /* Check clusters per index block value is valid. */ + if ((u8)b->clusters_per_index_record < 0xe1 || + (u8)b->clusters_per_index_record > 0xf7) + switch (b->clusters_per_index_record) { + case 1: case 2: case 4: case 8: case 16: case 32: case 64: + break; + default: + goto not_ntfs; + } + /* + * Check for valid end of sector marker. We will work without it, but + * many BIOSes will refuse to boot from a bootsector if the magic is + * incorrect, so we emit a warning. + */ + if (!silent && b->end_of_sector_marker != cpu_to_le16(0xaa55)) + ntfs_warning(sb, "Invalid end of sector marker."); + return TRUE; +not_ntfs: + return FALSE; +} + +/** + * read_ntfs_boot_sector - read the NTFS boot sector of a device + * @sb: super block of device to read the boot sector from + * @silent: if true, suppress all output + * + * Reads the boot sector from the device and validates it. If that fails, tries + * to read the backup boot sector, first from the end of the device a-la NT4 and + * later and then from the middle of the device a-la NT3.51 and before. + * + * If a valid boot sector is found but it is not the primary boot sector, we + * repair the primary boot sector silently (unless the device is read-only or + * the primary boot sector is not accessible). + * + * NOTE: To call this function, @sb must have the fields s_dev, the ntfs super + * block (u.ntfs_sb), nr_blocks and the device flags (s_flags) initialized + * to their respective values. + * + * Return the unlocked buffer head containing the boot sector or NULL on error. */ -int ntfs_init_volume(ntfs_volume *vol, char *boot) +static struct buffer_head *read_ntfs_boot_sector(struct super_block *sb, + const int silent) { - int sectors_per_cluster_bits; - __s64 ll; - ntfs_cluster_t mft_zone_size, tc; - - /* System defined default values, in case we don't load $AttrDef. */ - vol->at_standard_information = 0x10; - vol->at_attribute_list = 0x20; - vol->at_file_name = 0x30; - vol->at_volume_version = 0x40; - vol->at_security_descriptor = 0x50; - vol->at_volume_name = 0x60; - vol->at_volume_information = 0x70; - vol->at_data = 0x80; - vol->at_index_root = 0x90; - vol->at_index_allocation = 0xA0; - vol->at_bitmap = 0xB0; - vol->at_symlink = 0xC0; - /* Sector size. */ - vol->sector_size = NTFS_GETU16(boot + 0xB); - ntfs_debug(DEBUG_FILE3, "ntfs_init_volume: vol->sector_size = 0x%x\n", - vol->sector_size); - ntfs_debug(DEBUG_FILE3, "ntfs_init_volume: sectors_per_cluster = " - "0x%x\n", NTFS_GETU8(boot + 0xD)); - sectors_per_cluster_bits = ffs(NTFS_GETU8(boot + 0xD)) - 1; - ntfs_debug(DEBUG_FILE3, "ntfs_init_volume: sectors_per_cluster_bits " - "= 0x%x\n", sectors_per_cluster_bits); - vol->mft_clusters_per_record = NTFS_GETS8(boot + 0x40); - ntfs_debug(DEBUG_FILE3, "ntfs_init_volume: vol->mft_clusters_per_record" - " = 0x%x\n", vol->mft_clusters_per_record); - vol->index_clusters_per_record = NTFS_GETS8(boot + 0x44); - ntfs_debug(DEBUG_FILE3, "ntfs_init_volume: " - "vol->index_clusters_per_record = 0x%x\n", - vol->index_clusters_per_record); + const char *read_err_str = "Unable to read %s boot sector."; + struct buffer_head *bh_primary, *bh_backup; + long nr_blocks = NTFS_SB(sb)->nr_blocks; + + /* Try to read primary boot sector. */ + if ((bh_primary = sb_bread(sb, 0))) { + if (is_boot_sector_ntfs(sb, (NTFS_BOOT_SECTOR*) + bh_primary->b_data, silent)) + return bh_primary; + if (!silent) + ntfs_error(sb, "Primary boot sector is invalid."); + } else if (!silent) + ntfs_error(sb, read_err_str, "primary"); + if (!(NTFS_SB(sb)->on_errors & ON_ERRORS_RECOVER)) { + if (bh_primary) + brelse(bh_primary); + if (!silent) + ntfs_error(sb, "Mount option errors=recover not used. " + "Aborting without trying to recover."); + return NULL; + } + /* Try to read NT4+ backup boot sector. */ + if ((bh_backup = sb_bread(sb, nr_blocks - 1))) { + if (is_boot_sector_ntfs(sb, (NTFS_BOOT_SECTOR*) + bh_backup->b_data, silent)) + goto hotfix_primary_boot_sector; + brelse(bh_backup); + } else if (!silent) + ntfs_error(sb, read_err_str, "backup"); + /* Try to read NT3.51- backup boot sector. */ + if ((bh_backup = sb_bread(sb, nr_blocks >> 1))) { + if (is_boot_sector_ntfs(sb, (NTFS_BOOT_SECTOR*) + bh_backup->b_data, silent)) + goto hotfix_primary_boot_sector; + if (!silent) + ntfs_error(sb, "Could not find a valid backup boot " + "sector."); + brelse(bh_backup); + } else if (!silent) + ntfs_error(sb, read_err_str, "backup"); + /* We failed. Cleanup and return. */ + if (bh_primary) + brelse(bh_primary); + return NULL; +hotfix_primary_boot_sector: + if (bh_primary) { + /* + * If we managed to read sector zero and the volume is not + * read-only, copy the found, valid backup boot sector to the + * primary boot sector. + */ + if (!(sb->s_flags & MS_RDONLY)) { + ntfs_warning(sb, "Hot-fix: Recovering invalid primary " + "boot sector from backup copy."); + memcpy(bh_primary->b_data, bh_backup->b_data, + sb->s_blocksize); + mark_buffer_dirty(bh_primary); + ll_rw_block(WRITE, 1, &bh_primary); + wait_on_buffer(bh_primary); + if (buffer_uptodate(bh_primary)) { + brelse(bh_backup); + return bh_primary; + } + ntfs_error(sb, "Hot-fix: Device write error while " + "recovering primary boot sector."); + } else { + ntfs_warning(sb, "Hot-fix: Recovery of primary boot " + "sector failed: Read-only mount."); + } + brelse(bh_primary); + } + ntfs_warning(sb, "Using backup boot sector."); + return bh_backup; +} + +/** + * parse_ntfs_boot_sector - parse the boot sector and store the data in @vol + * @vol: volume structure to initialise with data from boot sector + * @b: boot sector to parse + * + * Parse the ntfs boot sector @b and store all imporant information therein in + * the ntfs super block @vol. Return TRUE on success and FALSE on error. + */ +static BOOL parse_ntfs_boot_sector(ntfs_volume *vol, const NTFS_BOOT_SECTOR *b) +{ + unsigned int sectors_per_cluster_bits, nr_hidden_sects; + int clusters_per_mft_record, clusters_per_index_record; + s64 ll; + + vol->sector_size = le16_to_cpu(b->bpb.bytes_per_sector); + vol->sector_size_bits = ffs(vol->sector_size) - 1; + ntfs_debug("vol->sector_size = %i (0x%x)", vol->sector_size, + vol->sector_size); + ntfs_debug("vol->sector_size_bits = %i (0x%x)", vol->sector_size_bits, + vol->sector_size_bits); + if (vol->sector_size != vol->sb->s_blocksize) + ntfs_warning(vol->sb, "The boot sector indicates a sector size " + "different from the device sector size."); + ntfs_debug("sectors_per_cluster = 0x%x", b->bpb.sectors_per_cluster); + sectors_per_cluster_bits = ffs(b->bpb.sectors_per_cluster) - 1; + ntfs_debug("sectors_per_cluster_bits = 0x%x", + sectors_per_cluster_bits); + nr_hidden_sects = le32_to_cpu(b->bpb.hidden_sectors); + ntfs_debug("number of hidden sectors = 0x%x", nr_hidden_sects); vol->cluster_size = vol->sector_size << sectors_per_cluster_bits; + vol->cluster_size_mask = vol->cluster_size - 1; vol->cluster_size_bits = ffs(vol->cluster_size) - 1; - ntfs_debug(DEBUG_FILE3, "ntfs_init_volume: vol->cluster_size = 0x%x\n", - vol->cluster_size); - ntfs_debug(DEBUG_FILE3, "ntfs_init_volume: vol->cluster_size_bits = " - "0x%x\n", vol->cluster_size_bits); - if (vol->mft_clusters_per_record > 0) + ntfs_debug("vol->cluster_size = %i (0x%x)", vol->cluster_size, + vol->cluster_size); + ntfs_debug("vol->cluster_size_mask = 0x%x", vol->cluster_size_mask); + ntfs_debug("vol->cluster_size_bits = %i (0x%x)", + vol->cluster_size_bits, vol->cluster_size_bits); + if (vol->sector_size > vol->cluster_size) { + ntfs_error(vol->sb, "Sector sizes above the cluster size are " + "not supported. Sorry."); + return FALSE; + } + if (vol->sb->s_blocksize > vol->cluster_size) { + ntfs_error(vol->sb, "Cluster sizes smaller than the device " + "sector size are not supported. Sorry."); + return FALSE; + } + clusters_per_mft_record = b->clusters_per_mft_record; + ntfs_debug("clusters_per_mft_record = %i (0x%x)", + clusters_per_mft_record, clusters_per_mft_record); + if (clusters_per_mft_record > 0) vol->mft_record_size = vol->cluster_size << - (ffs(vol->mft_clusters_per_record) - 1); + (ffs(clusters_per_mft_record) - 1); else /* - * When mft_record_size < cluster_size, mft_clusters_per_record + * When mft_record_size < cluster_size, clusters_per_mft_record * = -log2(mft_record_size) bytes. mft_record_size normaly is * 1024 bytes, which is encoded as 0xF6 (-10 in decimal). */ - vol->mft_record_size = 1 << -vol->mft_clusters_per_record; + vol->mft_record_size = 1 << -clusters_per_mft_record; + vol->mft_record_size_mask = vol->mft_record_size - 1; vol->mft_record_size_bits = ffs(vol->mft_record_size) - 1; - ntfs_debug(DEBUG_FILE3, "ntfs_init_volume: vol->mft_record_size = 0x%x" - "\n", vol->mft_record_size); - ntfs_debug(DEBUG_FILE3, "ntfs_init_volume: vol->mft_record_size_bits = " - "0x%x\n", vol->mft_record_size_bits); - if (vol->index_clusters_per_record > 0) + ntfs_debug("vol->mft_record_size = %i (0x%x)", vol->mft_record_size, + vol->mft_record_size); + ntfs_debug("vol->mft_record_size_mask = 0x%x", + vol->mft_record_size_mask); + ntfs_debug("vol->mft_record_size_bits = %i (0x%x)", + vol->mft_record_size_bits, vol->mft_record_size_bits); + clusters_per_index_record = b->clusters_per_index_record; + ntfs_debug("clusters_per_index_record = %i (0x%x)", + clusters_per_index_record, clusters_per_index_record); + if (clusters_per_index_record > 0) vol->index_record_size = vol->cluster_size << - (ffs(vol->index_clusters_per_record) - 1); + (ffs(clusters_per_index_record) - 1); else /* * When index_record_size < cluster_size, - * index_clusters_per_record = -log2(index_record_size) bytes. + * clusters_per_index_record = -log2(index_record_size) bytes. * index_record_size normaly equals 4096 bytes, which is * encoded as 0xF4 (-12 in decimal). */ - vol->index_record_size = 1 << -vol->index_clusters_per_record; + vol->index_record_size = 1 << -clusters_per_index_record; + vol->index_record_size_mask = vol->index_record_size - 1; vol->index_record_size_bits = ffs(vol->index_record_size) - 1; - ntfs_debug(DEBUG_FILE3, "ntfs_init_volume: vol->index_record_size = " - "0x%x\n", vol->index_record_size); - ntfs_debug(DEBUG_FILE3, "ntfs_init_volume: vol->index_record_size_bits " - "= 0x%x\n", vol->index_record_size_bits); - /* - * Get the size of the volume in clusters (ofs 0x28 is nr_sectors) and - * check for 64-bit-ness. Windows currently only uses 32 bits to save - * the clusters so we do the same as it is much faster on 32-bit CPUs. - */ - ll = NTFS_GETS64(boot + 0x28) >> sectors_per_cluster_bits; - if (ll >= (__s64)1 << 31) { - ntfs_error("Cannot handle 64-bit clusters. Please inform " - "linux-ntfs-dev@lists.sf.net that you got this " - "error.\n"); - return -1; - } - vol->nr_clusters = (ntfs_cluster_t)ll; - ntfs_debug(DEBUG_FILE3, "ntfs_init_volume: vol->nr_clusters = 0x%x\n", - vol->nr_clusters); - vol->mft_lcn = (ntfs_cluster_t)NTFS_GETS64(boot + 0x30); - vol->mft_mirr_lcn = (ntfs_cluster_t)NTFS_GETS64(boot + 0x38); - /* Determine MFT zone size. */ - mft_zone_size = vol->nr_clusters; + ntfs_debug("vol->index_record_size = %i (0x%x)", + vol->index_record_size, vol->index_record_size); + ntfs_debug("vol->index_record_size_mask = 0x%x", + vol->index_record_size_mask); + ntfs_debug("vol->index_record_size_bits = %i (0x%x)", + vol->index_record_size_bits, + vol->index_record_size_bits); + /* + * Get the size of the volume in clusters and check for 64-bit-ness. + * Windows currently only uses 32 bits to save the clusters so we do + * the same as it is much faster on 32-bit CPUs. + */ + ll = sle64_to_cpu(b->number_of_sectors) >> sectors_per_cluster_bits; + if ((u64)ll >= 1ULL << 32) { + ntfs_error(vol->sb, "Cannot handle 64-bit clusters. Sorry."); + return FALSE; + } + vol->nr_clusters = ll; + ntfs_debug("vol->nr_clusters = 0x%Lx", (long long)vol->nr_clusters); + /* + * On an architecture where unsigned long is 32-bits, we restrict the + * volume size to 2TiB (2^41). On a 64-bit architecture, the compiler + * will hopefully optimize the whole check away. + */ + if (sizeof(unsigned long) < 8) { + if ((ll << vol->cluster_size_bits) >= (1ULL << 41)) { + ntfs_error(vol->sb, "Volume size (%LuTiB) is too large " + "for this architecture. Maximim " + "supported is 2TiB. Sorry.", + ll >> (40 - vol->cluster_size_bits)); + return FALSE; + } + } + ll = sle64_to_cpu(b->mft_lcn); + if (ll >= vol->nr_clusters) { + ntfs_error(vol->sb, "MFT LCN is beyond end of volume. Weird."); + return FALSE; + } + vol->mft_lcn = ll; + ntfs_debug("vol->mft_lcn = 0x%Lx", (long long)vol->mft_lcn); + ll = sle64_to_cpu(b->mftmirr_lcn); + if (ll >= vol->nr_clusters) { + ntfs_error(vol->sb, "MFTMirr LCN is beyond end of volume. " + "Weird."); + return FALSE; + } + vol->mftmirr_lcn = ll; + ntfs_debug("vol->mftmirr_lcn = 0x%Lx", (long long)vol->mftmirr_lcn); + vol->serial_no = le64_to_cpu(b->volume_serial_number); + ntfs_debug("vol->serial_no = 0x%Lx", + (unsigned long long)vol->serial_no); + /* + * Determine MFT zone size. This is not strictly the right place to do + * this, but I am too lazy to create a function especially for it... + */ + vol->mft_zone_end = vol->nr_clusters; switch (vol->mft_zone_multiplier) { /* % of volume size in clusters */ case 4: - mft_zone_size >>= 1; /* 50% */ + vol->mft_zone_end = vol->mft_zone_end >> 1; /* 50% */ break; case 3: - mft_zone_size = mft_zone_size * 3 >> 3; /* 37.5% */ + vol->mft_zone_end = (vol->mft_zone_end + + (vol->mft_zone_end >> 1)) >> 2; /* 37.5% */ break; case 2: - mft_zone_size >>= 2; /* 25% */ + vol->mft_zone_end = vol->mft_zone_end >> 2; /* 25% */ break; - /* case 1: */ default: - mft_zone_size >>= 3; /* 12.5% */ + vol->mft_zone_multiplier = 1; + /* Fall through into case 1. */ + case 1: + vol->mft_zone_end = vol->mft_zone_end >> 3; /* 12.5% */ break; } - /* Setup mft zone. */ - vol->mft_zone_start = vol->mft_zone_pos = vol->mft_lcn; - ntfs_debug(DEBUG_FILE3, "ntfs_init_volume: vol->mft_zone_pos = %x\n", - vol->mft_zone_pos); - /* - * Calculate the mft_lcn for an unmodified NTFS volume (see mkntfs - * source) and if the actual mft_lcn is in the expected place or even - * further to the front of the volume, extend the mft_zone to cover the - * beginning of the volume as well. This is in order to protect the - * area reserved for the mft bitmap as well within the mft_zone itself. - * On non-standard volumes we don't protect it as well as the overhead - * would be higher than the speed increase we would get by doing it. - */ - tc = (8192 + 2 * vol->cluster_size - 1) / vol->cluster_size; - if (tc * vol->cluster_size < 16 * 1024) - tc = (16 * 1024 + vol->cluster_size - 1) / vol->cluster_size; - if (vol->mft_zone_start <= tc) - vol->mft_zone_start = (ntfs_cluster_t)0; - ntfs_debug(DEBUG_FILE3, "ntfs_init_volume: vol->mft_zone_start = %x\n", - vol->mft_zone_start); - /* - * Need to cap the mft zone on non-standard volumes so that it does - * not point outside the boundaries of the volume, we do this by - * halving the zone size until we are inside the volume. - */ - vol->mft_zone_end = vol->mft_lcn + mft_zone_size; - while (vol->mft_zone_end >= vol->nr_clusters) { - mft_zone_size >>= 1; - vol->mft_zone_end = vol->mft_lcn + mft_zone_size; - } - ntfs_debug(DEBUG_FILE3, "ntfs_init_volume: vol->mft_zone_end = %x\n", - vol->mft_zone_end); - /* - * Set the current position within each data zone to the start of the - * respective zone. - */ - vol->data1_zone_pos = vol->mft_zone_end; - ntfs_debug(DEBUG_FILE3, "ntfs_init_volume: vol->data1_zone_pos = %x\n", - vol->data1_zone_pos); - vol->data2_zone_pos = (ntfs_cluster_t)0; - ntfs_debug(DEBUG_FILE3, "ntfs_init_volume: vol->data2_zone_pos = %x\n", - vol->data2_zone_pos); - /* Set the mft data allocation position to mft record 24. */ - vol->mft_data_pos = 24UL; - /* This will be initialized later. */ - vol->upcase = 0; - vol->upcase_length = 0; - vol->mft_ino = 0; - return 0; -} - -static void ntfs_init_upcase(ntfs_inode *upcase) -{ - ntfs_io io; -#define UPCASE_LENGTH 256 - upcase->vol->upcase = ntfs_malloc(UPCASE_LENGTH << 1); - if (!upcase->vol->upcase) - return; - io.fn_put = ntfs_put; - io.fn_get = 0; - io.param = (char*)upcase->vol->upcase; - io.size = UPCASE_LENGTH << 1; - ntfs_read_attr(upcase, upcase->vol->at_data, 0, 0, &io); - upcase->vol->upcase_length = io.size >> 1; + ntfs_debug("vol->mft_zone_multiplier = 0x%x", + vol->mft_zone_multiplier); + vol->mft_zone_start = vol->mft_lcn; + vol->mft_zone_end += vol->mft_lcn; + ntfs_debug("vol->mft_zone_start = 0x%Lx", + (long long)vol->mft_zone_start); + ntfs_debug("vol->mft_zone_end = 0x%Lx", (long long)vol->mft_zone_end); + /* And another misplaced defaults setting. */ + if (!vol->on_errors) + vol->on_errors = ON_ERRORS_PANIC; + return TRUE; } -static int process_attrdef(ntfs_inode* attrdef, ntfs_u8* def) +/** + * load_and_init_upcase - load the upcase table for an ntfs volume + * @vol: ntfs super block describing device whose upcase to load + * + * Return TRUE on success or FALSE on error. + */ +static BOOL load_and_init_upcase(ntfs_volume *vol) { - int type = NTFS_GETU32(def+0x80); - int check_type = 0; - ntfs_volume *vol = attrdef->vol; - ntfs_u16* name = (ntfs_u16*)def; - - if (!type) { - ntfs_debug(DEBUG_OTHER, "process_atrdef: finished processing " - "and returning 1\n"); - return 1; - } - if (ntfs_ua_strncmp(name, "$STANDARD_INFORMATION", 64) == 0) { - vol->at_standard_information = type; - check_type = 0x10; - } else if (ntfs_ua_strncmp(name, "$ATTRIBUTE_LIST", 64) == 0) { - vol->at_attribute_list = type; - check_type = 0x20; - } else if (ntfs_ua_strncmp(name, "$FILE_NAME", 64) == 0) { - vol->at_file_name = type; - check_type = 0x30; - } else if (ntfs_ua_strncmp(name, "$VOLUME_VERSION", 64) == 0) { - vol->at_volume_version = type; - check_type = 0x40; - } else if (ntfs_ua_strncmp(name, "$SECURITY_DESCRIPTOR", 64) == 0) { - vol->at_security_descriptor = type; - check_type = 0x50; - } else if (ntfs_ua_strncmp(name, "$VOLUME_NAME", 64) == 0) { - vol->at_volume_name = type; - check_type = 0x60; - } else if (ntfs_ua_strncmp(name, "$VOLUME_INFORMATION", 64) == 0) { - vol->at_volume_information = type; - check_type = 0x70; - } else if (ntfs_ua_strncmp(name, "$DATA", 64) == 0) { - vol->at_data = type; - check_type = 0x80; - } else if (ntfs_ua_strncmp(name, "$INDEX_ROOT", 64) == 0) { - vol->at_index_root = type; - check_type = 0x90; - } else if (ntfs_ua_strncmp(name, "$INDEX_ALLOCATION", 64) == 0) { - vol->at_index_allocation = type; - check_type = 0xA0; - } else if (ntfs_ua_strncmp(name, "$BITMAP", 64) == 0) { - vol->at_bitmap = type; - check_type = 0xB0; - } else if (ntfs_ua_strncmp(name, "$SYMBOLIC_LINK", 64) == 0 || - ntfs_ua_strncmp(name, "$REPARSE_POINT", 64) == 0) { - vol->at_symlink = type; - check_type = 0xC0; - } - if (check_type && check_type != type) { - ntfs_error("process_attrdef: unexpected type 0x%x for 0x%x\n", - type, check_type); - return -EINVAL; + struct super_block *sb = vol->sb; + struct inode *ino; + struct page *page; + unsigned long index, max_index; + unsigned int size; + int i, max; + + ntfs_debug("Entering."); + /* Read upcase table and setup vol->upcase and vol->upcase_len. */ + ino = ntfs_iget(sb, FILE_UpCase); + if (IS_ERR(ino) || is_bad_inode(ino)) { + if (!IS_ERR(ino)) + iput(ino); + goto upcase_failed; } - ntfs_debug(DEBUG_OTHER, "process_attrdef: found %s attribute of type " - "0x%x\n", check_type ? "known" : "unknown", type); - return 0; + /* + * The upcase size must not be above 64k Unicode characters, must not + * be zero and must be a multiple of sizeof(uchar_t). + */ + if (!ino->i_size || ino->i_size & (sizeof(uchar_t) - 1) || + ino->i_size > 64ULL * 1024 * sizeof(uchar_t)) + goto iput_upcase_failed; + vol->upcase = (uchar_t*)ntfs_malloc_nofs(ino->i_size); + if (!vol->upcase) + goto iput_upcase_failed; + index = 0; + max_index = ino->i_size >> PAGE_CACHE_SHIFT; + size = PAGE_CACHE_SIZE; + while (index < max_index) { + /* Read the upcase table and copy it into the linear buffer. */ +read_partial_upcase_page: + page = ntfs_map_page(ino->i_mapping, index); + if (IS_ERR(page)) + goto iput_upcase_failed; + memcpy((char*)vol->upcase + (index++ << PAGE_CACHE_SHIFT), + page_address(page), size); + ntfs_unmap_page(page); + }; + if (size == PAGE_CACHE_SIZE) { + size = ino->i_size & ~PAGE_CACHE_MASK; + if (size) + goto read_partial_upcase_page; + } + vol->upcase_len = ino->i_size >> UCHAR_T_SIZE_BITS; + ntfs_debug("Read %Lu bytes from $UpCase (expected %u bytes).", + ino->i_size, 64 * 1024 * sizeof(uchar_t)); + iput(ino); + down(&ntfs_lock); + if (!default_upcase) { + ntfs_debug("Using volume specified $UpCase since default is " + "not present."); + up(&ntfs_lock); + return TRUE; + } + max = default_upcase_len; + if (max > vol->upcase_len) + max = vol->upcase_len; + for (i = 0; i < max; i++) + if (vol->upcase[i] != default_upcase[i]) + break; + if (i == max) { + ntfs_free(vol->upcase); + vol->upcase = default_upcase; + vol->upcase_len = max; + ntfs_nr_upcase_users++; + up(&ntfs_lock); + ntfs_debug("Volume specified $UpCase matches default. Using " + "default."); + return TRUE; + } + up(&ntfs_lock); + ntfs_debug("Using volume specified $UpCase since it does not match " + "the default."); + return TRUE; +iput_upcase_failed: + iput(ino); + ntfs_free(vol->upcase); + vol->upcase = NULL; +upcase_failed: + down(&ntfs_lock); + if (default_upcase) { + vol->upcase = default_upcase; + vol->upcase_len = default_upcase_len; + ntfs_nr_upcase_users++; + up(&ntfs_lock); + ntfs_error(sb, "Failed to load $UpCase from the volume. Using " + "default."); + return TRUE; + } + up(&ntfs_lock); + ntfs_error(sb, "Failed to initialized upcase table."); + return FALSE; } -int ntfs_init_attrdef(ntfs_inode* attrdef) +/** + * load_system_files - open the system files using normal functions + * @vol: ntfs super block describing device whose system files to load + * + * Open the system files with normal access functions and complete setting up + * the ntfs super block @vol. + * + * Return TRUE on success or FALSE on error. + */ +static BOOL load_system_files(ntfs_volume *vol) { - ntfs_u8 *buf; - ntfs_io io; - __s64 offset; - unsigned i; - int error; - ntfs_attribute *data; - - ntfs_debug(DEBUG_BSD, "Entered ntfs_init_attrdef()\n"); - buf = ntfs_malloc(4050); /* 90*45 */ - if (!buf) - return -ENOMEM; - io.fn_put = ntfs_put; - io.fn_get = ntfs_get; - io.do_read = 1; - offset = 0; - data = ntfs_find_attr(attrdef, attrdef->vol->at_data, 0); - ntfs_debug(DEBUG_BSD, "In ntfs_init_attrdef() after call to " - "ntfs_find_attr.\n"); - if (!data) { - ntfs_free(buf); - return -EINVAL; + struct super_block *sb = vol->sb; + struct inode *tmp_ino; + MFT_RECORD *m; + VOLUME_INFORMATION *vi; + attr_search_context *ctx; + + ntfs_debug("Entering."); + + /* Get mft bitmap attribute inode. */ + vol->mftbmp_ino = ntfs_attr_iget(vol->mft_ino, AT_BITMAP, NULL, 0); + if (IS_ERR(vol->mftbmp_ino)) { + ntfs_error(sb, "Failed to load $MFT/$BITMAP attribute."); + return FALSE; + } + + /* Get mft mirror inode. */ + vol->mftmirr_ino = ntfs_iget(sb, FILE_MFTMirr); + if (IS_ERR(vol->mftmirr_ino) || is_bad_inode(vol->mftmirr_ino)) { + if (!IS_ERR(vol->mftmirr_ino)) + iput(vol->mftmirr_ino); + ntfs_error(sb, "Failed to load $MFTMirr."); + goto iput_mftbmp_err_out; + } + // FIXME: Compare mftmirr with mft and repair if appropriate and not + // a read-only mount. + + /* Read upcase table and setup vol->upcase and vol->upcase_len. */ + if (!load_and_init_upcase(vol)) + goto iput_mirr_err_out; + /* + * Get the cluster allocation bitmap inode and verify the size, no + * need for any locking at this stage as we are already running + * exclusively as we are mount in progress task. + */ + vol->lcnbmp_ino = ntfs_iget(sb, FILE_Bitmap); + if (IS_ERR(vol->lcnbmp_ino) || is_bad_inode(vol->lcnbmp_ino)) { + if (!IS_ERR(vol->lcnbmp_ino)) + iput(vol->lcnbmp_ino); + goto bitmap_failed; + } + if ((vol->nr_clusters + 7) >> 3 > vol->lcnbmp_ino->i_size) { + iput(vol->lcnbmp_ino); +bitmap_failed: + ntfs_error(sb, "Failed to load $Bitmap."); + goto iput_mirr_err_out; } - do { - io.param = buf; - io.size = 4050; - ntfs_debug(DEBUG_BSD, "In ntfs_init_attrdef() going to call " - "ntfs_readwrite_attr.\n"); - error = ntfs_readwrite_attr(attrdef, data, offset, &io); - ntfs_debug(DEBUG_BSD, "In ntfs_init_attrdef() after call to " - "ntfs_readwrite_attr.\n"); - for (i = 0; !error && i <= io.size - 0xA0; i += 0xA0) { - ntfs_debug(DEBUG_BSD, "In ntfs_init_attrdef() going " - "to call process_attrdef.\n"); - error = process_attrdef(attrdef, buf + i); - ntfs_debug(DEBUG_BSD, "In ntfs_init_attrdef() after " - "call to process_attrdef.\n"); - } - offset += 4096; - } while (!error && io.size); - ntfs_debug(DEBUG_BSD, "Exiting ntfs_init_attrdef()\n"); - ntfs_free(buf); - return error == 1 ? 0 : error; + /* + * Get the volume inode and setup our cache of the volume flags and + * version. + */ + vol->vol_ino = ntfs_iget(sb, FILE_Volume); + if (IS_ERR(vol->vol_ino) || is_bad_inode(vol->vol_ino)) { + if (!IS_ERR(vol->vol_ino)) + iput(vol->vol_ino); +volume_failed: + ntfs_error(sb, "Failed to load $Volume."); + goto iput_lcnbmp_err_out; + } + m = map_mft_record(NTFS_I(vol->vol_ino)); + if (IS_ERR(m)) { +iput_volume_failed: + iput(vol->vol_ino); + goto volume_failed; + } + if (!(ctx = get_attr_search_ctx(NTFS_I(vol->vol_ino), m))) { + ntfs_error(sb, "Failed to get attribute search context."); + goto get_ctx_vol_failed; + } + if (!lookup_attr(AT_VOLUME_INFORMATION, NULL, 0, 0, 0, NULL, 0, ctx) || + ctx->attr->non_resident || ctx->attr->flags) { +err_put_vol: + put_attr_search_ctx(ctx); +get_ctx_vol_failed: + unmap_mft_record(NTFS_I(vol->vol_ino)); + goto iput_volume_failed; + } + vi = (VOLUME_INFORMATION*)((char*)ctx->attr + + le16_to_cpu(ctx->attr->data.resident.value_offset)); + /* Some bounds checks. */ + if ((u8*)vi < (u8*)ctx->attr || (u8*)vi + + le32_to_cpu(ctx->attr->data.resident.value_length) > + (u8*)ctx->attr + le32_to_cpu(ctx->attr->length)) + goto err_put_vol; + /* Setup volume flags and version. */ + vol->vol_flags = vi->flags; + vol->major_ver = vi->major_ver; + vol->minor_ver = vi->minor_ver; + put_attr_search_ctx(ctx); + unmap_mft_record(NTFS_I(vol->vol_ino)); + printk(KERN_INFO "NTFS volume version %i.%i.\n", vol->major_ver, + vol->minor_ver); + /* + * Get the inode for the logfile and empty it if this is a read-write + * mount. + */ + tmp_ino = ntfs_iget(sb, FILE_LogFile); + if (IS_ERR(tmp_ino) || is_bad_inode(tmp_ino)) { + if (!IS_ERR(tmp_ino)) + iput(tmp_ino); + ntfs_error(sb, "Failed to load $LogFile."); + // FIMXE: We only want to empty the thing so pointless bailing + // out. Can recover/ignore. + goto iput_vol_err_out; + } + // FIXME: Empty the logfile, but only if not read-only. + // FIXME: What happens if someone remounts rw? We need to empty the file + // then. We need a flag to tell us whether we have done it already. + iput(tmp_ino); + /* + * Get the inode for the attribute definitions file and parse the + * attribute definitions. + */ + tmp_ino = ntfs_iget(sb, FILE_AttrDef); + if (IS_ERR(tmp_ino) || is_bad_inode(tmp_ino)) { + if (!IS_ERR(tmp_ino)) + iput(tmp_ino); + ntfs_error(sb, "Failed to load $AttrDef."); + goto iput_vol_err_out; + } + // FIXME: Parse the attribute definitions. + iput(tmp_ino); + /* Get the root directory inode. */ + vol->root_ino = ntfs_iget(sb, FILE_root); + if (IS_ERR(vol->root_ino) || is_bad_inode(vol->root_ino)) { + if (!IS_ERR(vol->root_ino)) + iput(vol->root_ino); + ntfs_error(sb, "Failed to load root directory."); + goto iput_vol_err_out; + } + /* If on NTFS versions before 3.0, we are done. */ + if (vol->major_ver < 3) + return TRUE; + /* NTFS 3.0+ specific initialization. */ + /* Get the security descriptors inode. */ + vol->secure_ino = ntfs_iget(sb, FILE_Secure); + if (IS_ERR(vol->secure_ino) || is_bad_inode(vol->secure_ino)) { + if (!IS_ERR(vol->secure_ino)) + iput(vol->secure_ino); + ntfs_error(sb, "Failed to load $Secure."); + goto iput_root_err_out; + } + // FIXME: Initialize security. + /* Get the extended system files' directory inode. */ + tmp_ino = ntfs_iget(sb, FILE_Extend); + if (IS_ERR(tmp_ino) || is_bad_inode(tmp_ino)) { + if (!IS_ERR(tmp_ino)) + iput(tmp_ino); + ntfs_error(sb, "Failed to load $Extend."); + goto iput_sec_err_out; + } + // FIXME: Do something. E.g. want to delete the $UsnJrnl if exists. + // Note we might be doing this at the wrong level; we might want to + // d_alloc_root() and then do a "normal" open(2) of $Extend\$UsnJrnl + // rather than using ntfs_iget here, as we don't know the inode number + // for the files in $Extend directory. + iput(tmp_ino); + return TRUE; +iput_sec_err_out: + iput(vol->secure_ino); +iput_root_err_out: + iput(vol->root_ino); +iput_vol_err_out: + iput(vol->vol_ino); +iput_lcnbmp_err_out: + iput(vol->lcnbmp_ino); +iput_mirr_err_out: + iput(vol->mftmirr_ino); +iput_mftbmp_err_out: + iput(vol->mftbmp_ino); + return FALSE; } -/* ntfs_get_version will determine the NTFS version of the volume and will - * return the version in a BCD format, with the MSB being the major version - * number and the LSB the minor one. Otherwise return <0 on error. - * Example: version 3.1 will be returned as 0x0301. This has the obvious - * limitation of not coping with version numbers above 0x80 but that shouldn't - * be a problem... */ -int ntfs_get_version(ntfs_inode* volume) +/** + * ntfs_put_super - called by the vfs to unmount a volume + * @vfs_sb: vfs superblock of volume to unmount + * + * ntfs_put_super() is called by the VFS (from fs/super.c::do_umount()) when + * the volume is being unmounted (umount system call has been invoked) and it + * releases all inodes and memory belonging to the NTFS specific part of the + * super block. + */ +static void ntfs_put_super(struct super_block *vfs_sb) { - ntfs_attribute *volinfo; + ntfs_volume *vol = NTFS_SB(vfs_sb); - volinfo = ntfs_find_attr(volume, volume->vol->at_volume_information, 0); - if (!volinfo) - return -EINVAL; - if (!volinfo->resident) { - ntfs_error("Volume information attribute is not resident!\n"); - return -EINVAL; + ntfs_debug("Entering."); + + iput(vol->vol_ino); + vol->vol_ino = NULL; + + /* NTFS 3.0+ specific clean up. */ + if (vol->major_ver >= 3) { + if (vol->secure_ino) { + iput(vol->secure_ino); + vol->secure_ino = NULL; + } } - return ((ntfs_u8*)volinfo->d.data)[8] << 8 | - ((ntfs_u8*)volinfo->d.data)[9]; -} -int ntfs_load_special_files(ntfs_volume *vol) -{ - int error; - ntfs_inode upcase, attrdef, volume; + iput(vol->root_ino); + vol->root_ino = NULL; - vol->mft_ino = (ntfs_inode*)ntfs_calloc(sizeof(ntfs_inode)); - vol->mftmirr = (ntfs_inode*)ntfs_calloc(sizeof(ntfs_inode)); - vol->bitmap = (ntfs_inode*)ntfs_calloc(sizeof(ntfs_inode)); - vol->ino_flags = 4 | 2 | 1; - error = -ENOMEM; - ntfs_debug(DEBUG_BSD, "Going to load MFT\n"); - if (!vol->mft_ino || (error = ntfs_init_inode(vol->mft_ino, vol, - FILE_Mft))) { - ntfs_error("Problem loading MFT\n"); - return error; - } - ntfs_debug(DEBUG_BSD, "Going to load MIRR\n"); - if ((error = ntfs_init_inode(vol->mftmirr, vol, FILE_MftMirr))) { - ntfs_error("Problem %d loading MFTMirr\n", error); - return error; - } - ntfs_debug(DEBUG_BSD, "Going to load BITMAP\n"); - if ((error = ntfs_init_inode(vol->bitmap, vol, FILE_BitMap))) { - ntfs_error("Problem loading Bitmap\n"); - return error; - } - ntfs_debug(DEBUG_BSD, "Going to load UPCASE\n"); - error = ntfs_init_inode(&upcase, vol, FILE_UpCase); - if (error) - return error; - ntfs_init_upcase(&upcase); - ntfs_clear_inode(&upcase); - ntfs_debug(DEBUG_BSD, "Going to load ATTRDEF\n"); - error = ntfs_init_inode(&attrdef, vol, FILE_AttrDef); - if (error) - return error; - error = ntfs_init_attrdef(&attrdef); - ntfs_clear_inode(&attrdef); - if (error) - return error; - - /* Check for NTFS version and if Win2k version (ie. 3.0+) do not allow - * write access since the driver write support is broken. */ - ntfs_debug(DEBUG_BSD, "Going to load VOLUME\n"); - error = ntfs_init_inode(&volume, vol, FILE_Volume); - if (error) - return error; - if ((error = ntfs_get_version(&volume)) >= 0x0300 && - !(NTFS_SB(vol)->s_flags & MS_RDONLY)) { - NTFS_SB(vol)->s_flags |= MS_RDONLY; - ntfs_error("Warning! NTFS volume version is Win2k+: Mounting " - "read-only\n"); - } - ntfs_clear_inode(&volume); - if (error < 0) - return error; - ntfs_debug(DEBUG_BSD, "NTFS volume is v%d.%d\n", error >> 8, - error & 0xff); - return 0; + down_write(&vol->lcnbmp_lock); + iput(vol->lcnbmp_ino); + vol->lcnbmp_ino = NULL; + up_write(&vol->lcnbmp_lock); + + iput(vol->mftmirr_ino); + vol->mftmirr_ino = NULL; + + down_write(&vol->mftbmp_lock); + iput(vol->mftbmp_ino); + vol->mftbmp_ino = NULL; + up_write(&vol->mftbmp_lock); + + iput(vol->mft_ino); + vol->mft_ino = NULL; + + vol->upcase_len = 0; + /* + * Decrease the number of mounts and destroy the global default upcase + * table if necessary. Also decrease the number of upcase users if we + * are a user. + */ + down(&ntfs_lock); + ntfs_nr_mounts--; + if (vol->upcase == default_upcase) { + ntfs_nr_upcase_users--; + vol->upcase = NULL; + } + if (!ntfs_nr_upcase_users && default_upcase) { + ntfs_free(default_upcase); + default_upcase = NULL; + } + if (vol->cluster_size <= 4096 && !--ntfs_nr_compression_users) + free_compression_buffers(); + up(&ntfs_lock); + if (vol->upcase) { + ntfs_free(vol->upcase); + vol->upcase = NULL; + } + if (vol->nls_map) { + unload_nls(vol->nls_map); + vol->nls_map = NULL; + } + vfs_sb->u.generic_sbp = NULL; + kfree(vol); + return; } -int ntfs_release_volume(ntfs_volume *vol) +/** + * get_nr_free_clusters - return the number of free clusters on a volume + * @vol: ntfs volume for which to obtain free cluster count + * + * Calculate the number of free clusters on the mounted NTFS volume @vol. We + * actually calculate the number of clusters in use instead because this + * allows us to not care about partial pages as these will be just zero filled + * and hence not be counted as allocated clusters. + * + * The only particularity is that clusters beyond the end of the logical ntfs + * volume will be marked as allocated to prevent errors which means we have to + * discount those at the end. This is important as the cluster bitmap always + * has a size in multiples of 8 bytes, i.e. up to 63 clusters could be outside + * the logical volume and marked in use when they are not as they do not exist. + * + * If any pages cannot be read we assume all clusters in the erroring pages are + * in use. This means we return an underestimate on errors which is better than + * an overestimate. + */ +static s64 get_nr_free_clusters(ntfs_volume *vol) { - if (((vol->ino_flags & 1) == 1) && vol->mft_ino) { - ntfs_clear_inode(vol->mft_ino); - ntfs_free(vol->mft_ino); - vol->mft_ino = 0; - } - if (((vol->ino_flags & 2) == 2) && vol->mftmirr) { - ntfs_clear_inode(vol->mftmirr); - ntfs_free(vol->mftmirr); - vol->mftmirr = 0; - } - if (((vol->ino_flags & 4) == 4) && vol->bitmap) { - ntfs_clear_inode(vol->bitmap); - ntfs_free(vol->bitmap); - vol->bitmap = 0; + s64 nr_free = vol->nr_clusters; + u32 *kaddr; + struct address_space *mapping = vol->lcnbmp_ino->i_mapping; + filler_t *readpage = (filler_t*)mapping->a_ops->readpage; + struct page *page; + unsigned long index, max_index; + unsigned int max_size; + + ntfs_debug("Entering."); + /* Serialize accesses to the cluster bitmap. */ + down_read(&vol->lcnbmp_lock); + /* + * Convert the number of bits into bytes rounded up, then convert into + * multiples of PAGE_CACHE_SIZE, rounding up so that if we have one + * full and one partial page max_index = 2. + */ + max_index = (((vol->nr_clusters + 7) >> 3) + PAGE_CACHE_SIZE - 1) >> + PAGE_CACHE_SHIFT; + /* Use multiples of 4 bytes. */ + max_size = PAGE_CACHE_SIZE >> 2; + ntfs_debug("Reading $Bitmap, max_index = 0x%lx, max_size = 0x%x.", + max_index, max_size); + for (index = 0UL; index < max_index; index++) { + unsigned int i; + /* + * Read the page from page cache, getting it from backing store + * if necessary, and increment the use count. + */ + page = read_cache_page(mapping, index, (filler_t*)readpage, + NULL); + /* Ignore pages which errored synchronously. */ + if (IS_ERR(page)) { + ntfs_debug("Sync read_cache_page() error. Skipping " + "page (index 0x%lx).", index); + nr_free -= PAGE_CACHE_SIZE * 8; + continue; + } + wait_on_page(page); + /* Ignore pages which errored asynchronously. */ + if (!PageUptodate(page)) { + ntfs_debug("Async read_cache_page() error. Skipping " + "page (index 0x%lx).", index); + page_cache_release(page); + nr_free -= PAGE_CACHE_SIZE * 8; + continue; + } + kaddr = (u32*)kmap_atomic(page, KM_USER0); + /* + * For each 4 bytes, subtract the number of set bits. If this + * is the last page and it is partial we don't really care as + * it just means we do a little extra work but it won't affect + * the result as all out of range bytes are set to zero by + * ntfs_readpage(). + */ + for (i = 0; i < max_size; i++) + nr_free -= (s64)hweight32(kaddr[i]); + kunmap_atomic(kaddr, KM_USER0); + page_cache_release(page); } - ntfs_free(vol->mft); - ntfs_free(vol->upcase); - return 0; + ntfs_debug("Finished reading $Bitmap, last index = 0x%lx.", index - 1); + /* + * Fixup for eventual bits outside logical ntfs volume (see function + * description above). + */ + if (vol->nr_clusters & 63) + nr_free += 64 - (vol->nr_clusters & 63); + up_read(&vol->lcnbmp_lock); + /* If errors occured we may well have gone below zero, fix this. */ + if (nr_free < 0) + nr_free = 0; + ntfs_debug("Exiting."); + return nr_free; } -/* - * Writes the volume size (units of clusters) into vol_size. - * Returns 0 if successful or error. +/** + * __get_nr_free_mft_records - return the number of free inodes on a volume + * @vol: ntfs volume for which to obtain free inode count + * + * Calculate the number of free mft records (inodes) on the mounted NTFS + * volume @vol. We actually calculate the number of mft records in use instead + * because this allows us to not care about partial pages as these will be just + * zero filled and hence not be counted as allocated mft record. + * + * If any pages cannot be read we assume all mft records in the erroring pages + * are in use. This means we return an underestimate on errors which is better + * than an overestimate. + * + * NOTE: Caller must hold mftbmp_lock rw_semaphore for reading or writing. */ -int ntfs_get_volumesize(ntfs_volume *vol, ntfs_s64 *vol_size) +static unsigned long __get_nr_free_mft_records(ntfs_volume *vol) { - ntfs_io io; - char *cluster0; + s64 nr_free = vol->nr_mft_records; + u32 *kaddr; + struct address_space *mapping = vol->mftbmp_ino->i_mapping; + filler_t *readpage = (filler_t*)mapping->a_ops->readpage; + struct page *page; + unsigned long index, max_index; + unsigned int max_size; - if (!vol_size) - return -EFAULT; - cluster0 = ntfs_malloc(vol->cluster_size); - if (!cluster0) - return -ENOMEM; - io.fn_put = ntfs_put; - io.fn_get = ntfs_get; - io.param = cluster0; - io.do_read = 1; - io.size = vol->cluster_size; - ntfs_getput_clusters(vol, 0, 0, &io); - *vol_size = NTFS_GETU64(cluster0 + 0x28) >> - (ffs(NTFS_GETU8(cluster0 + 0xD)) - 1); - ntfs_free(cluster0); - return 0; -} - -static int nc[16]={4,3,3,2,3,2,2,1,3,2,2,1,2,1,1,0}; - -int ntfs_get_free_cluster_count(ntfs_inode *bitmap) -{ - ntfs_io io; - int offset, error, clusters; - unsigned char *bits = ntfs_malloc(2048); - if (!bits) - return -ENOMEM; - offset = clusters = 0; - io.fn_put = ntfs_put; - io.fn_get = ntfs_get; - while (1) { - register int i; - io.param = bits; - io.size = 2048; - error = ntfs_read_attr(bitmap, bitmap->vol->at_data, 0, offset, - &io); - if (error || io.size == 0) - break; - /* I never thought I would do loop unrolling some day */ - for (i = 0; i < io.size - 8; ) { - clusters+=nc[bits[i]>>4];clusters+=nc[bits[i++] & 0xF]; - clusters+=nc[bits[i]>>4];clusters+=nc[bits[i++] & 0xF]; - clusters+=nc[bits[i]>>4];clusters+=nc[bits[i++] & 0xF]; - clusters+=nc[bits[i]>>4];clusters+=nc[bits[i++] & 0xF]; - clusters+=nc[bits[i]>>4];clusters+=nc[bits[i++] & 0xF]; - clusters+=nc[bits[i]>>4];clusters+=nc[bits[i++] & 0xF]; - clusters+=nc[bits[i]>>4];clusters+=nc[bits[i++] & 0xF]; - clusters+=nc[bits[i]>>4];clusters+=nc[bits[i++] & 0xF]; + ntfs_debug("Entering."); + /* + * Convert the number of bits into bytes rounded up, then convert into + * multiples of PAGE_CACHE_SIZE, rounding up so that if we have one + * full and one partial page max_index = 2. + */ + max_index = (((vol->nr_mft_records + 7) >> 3) + PAGE_CACHE_SIZE - 1) >> + PAGE_CACHE_SHIFT; + /* Use multiples of 4 bytes. */ + max_size = PAGE_CACHE_SIZE >> 2; + ntfs_debug("Reading $MFT/$BITMAP, max_index = 0x%lx, max_size = " + "0x%x.", max_index, max_size); + for (index = 0UL; index < max_index; index++) { + unsigned int i; + /* + * Read the page from page cache, getting it from backing store + * if necessary, and increment the use count. + */ + page = read_cache_page(mapping, index, (filler_t*)readpage, + NULL); + /* Ignore pages which errored synchronously. */ + if (IS_ERR(page)) { + ntfs_debug("Sync read_cache_page() error. Skipping " + "page (index 0x%lx).", index); + nr_free -= PAGE_CACHE_SIZE * 8; + continue; } - while (i < io.size) { - clusters += nc[bits[i] >> 4]; - clusters += nc[bits[i++] & 0xF]; + wait_on_page(page); + /* Ignore pages which errored asynchronously. */ + if (!PageUptodate(page)) { + ntfs_debug("Async read_cache_page() error. Skipping " + "page (index 0x%lx).", index); + page_cache_release(page); + nr_free -= PAGE_CACHE_SIZE * 8; + continue; } - offset += io.size; - } - ntfs_free(bits); - return clusters; + kaddr = (u32*)kmap_atomic(page, KM_USER0); + /* + * For each 4 bytes, subtract the number of set bits. If this + * is the last page and it is partial we don't really care as + * it just means we do a little extra work but it won't affect + * the result as all out of range bytes are set to zero by + * ntfs_readpage(). + */ + for (i = 0; i < max_size; i++) + nr_free -= (s64)hweight32(kaddr[i]); + kunmap_atomic(kaddr, KM_USER0); + page_cache_release(page); + } + ntfs_debug("Finished reading $MFT/$BITMAP, last index = 0x%lx.", + index - 1); + /* If errors occured we may well have gone below zero, fix this. */ + if (nr_free < 0) + nr_free = 0; + ntfs_debug("Exiting."); + return nr_free; } -/* - * Insert the fixups for the record. The number and location of the fixes - * is obtained from the record header but we double check with @rec_size and - * use that as the upper boundary, if necessary overwriting the count value in - * the record header. +/** + * ntfs_statfs - return information about mounted NTFS volume + * @sb: super block of mounted volume + * @sfs: statfs structure in which to return the information + * + * Return information about the mounted NTFS volume @sb in the statfs structure + * pointed to by @sfs (this is initialized with zeros before ntfs_statfs is + * called). We interpret the values to be correct of the moment in time at + * which we are called. Most values are variable otherwise and this isn't just + * the free values but the totals as well. For example we can increase the + * total number of file nodes if we run out and we can keep doing this until + * there is no more space on the volume left at all. + * + * Called from vfs_statfs which is used to handle the statfs, fstatfs, and + * ustat system calls. * - * We return 0 on success or -1 if fixup header indicated the beginning of the - * update sequence array to be beyond the valid limit. + * Return 0 on success or -errno on error. */ -int ntfs_insert_fixups(unsigned char *rec, int rec_size) +static int ntfs_statfs(struct super_block *sb, struct statfs *sfs) { - int first; - int count; - int offset = -2; - ntfs_u16 fix; - - first = NTFS_GETU16(rec + 4); - count = (rec_size >> NTFS_SECTOR_BITS) + 1; - if (first + count * 2 > NTFS_SECTOR_SIZE - 2) { - printk(KERN_CRIT "NTFS: ntfs_insert_fixups() detected corrupt " - "NTFS record update sequence array position. - " - "Cannot hotfix.\n"); - return -1; - } - if (count != NTFS_GETU16(rec + 6)) { - printk(KERN_ERR "NTFS: ntfs_insert_fixups() detected corrupt " - "NTFS record update sequence array size. - " - "Applying hotfix.\n"); - NTFS_PUTU16(rec + 6, count); - } - fix = (NTFS_GETU16(rec + first) + 1) & 0xffff; - if (fix == 0xffff || !fix) - fix = 1; - NTFS_PUTU16(rec + first, fix); - count--; - while (count--) { - first += 2; - offset += NTFS_SECTOR_SIZE; - NTFS_PUTU16(rec + first, NTFS_GETU16(rec + offset)); - NTFS_PUTU16(rec + offset, fix); - } + ntfs_volume *vol = NTFS_SB(sb); + s64 size; + + ntfs_debug("Entering."); + /* Type of filesystem. */ + sfs->f_type = NTFS_SB_MAGIC; + /* Optimal transfer block size. */ + sfs->f_bsize = PAGE_CACHE_SIZE; + /* + * Total data blocks in file system in units of f_bsize and since + * inodes are also stored in data blocs ($MFT is a file) this is just + * the total clusters. + */ + sfs->f_blocks = vol->nr_clusters << vol->cluster_size_bits >> + PAGE_CACHE_SHIFT; + /* Free data blocks in file system in units of f_bsize. */ + size = get_nr_free_clusters(vol) << vol->cluster_size_bits >> + PAGE_CACHE_SHIFT; + if (size < 0LL) + size = 0LL; + /* Free blocks avail to non-superuser, same as above on NTFS. */ + sfs->f_bavail = sfs->f_bfree = size; + /* Serialize accesses to the inode bitmap. */ + down_read(&vol->mftbmp_lock); + /* Total file nodes in file system (at this moment in time). */ + sfs->f_files = vol->mft_ino->i_size >> vol->mft_record_size_bits; + /* Free file nodes in fs (based on current total count). */ + sfs->f_ffree = __get_nr_free_mft_records(vol); + up_read(&vol->mftbmp_lock); + /* + * File system id. This is extremely *nix flavour dependent and even + * within Linux itself all fs do their own thing. I interpret this to + * mean a unique id associated with the mounted fs and not the id + * associated with the file system driver, the latter is already given + * by the file system type in sfs->f_type. Thus we use the 64-bit + * volume serial number splitting it into two 32-bit parts. We enter + * the least significant 32-bits in f_fsid[0] and the most significant + * 32-bits in f_fsid[1]. + */ + sfs->f_fsid.val[0] = vol->serial_no & 0xffffffff; + sfs->f_fsid.val[1] = (vol->serial_no >> 32) & 0xffffffff; + /* Maximum length of filenames. */ + sfs->f_namelen = NTFS_MAX_NAME_LEN; return 0; } /** - * ntfs_allocate_clusters - allocate logical clusters on an ntfs volume - * @vol: volume on which to allocate clusters - * @location: preferred location for first allocated cluster - * @count: number of clusters to allocate - * @rl: address of pointer in which to return the allocated run list - * @rl_len: the number of elements returned in @*rl - * - * Allocate @*count clusters (LCNs), preferably beginning at @*location in the - * bitmap of the volume @vol. If @*location is -1, it does not matter where the - * clusters are. @rl is the address of a ntfs_runlist pointer which this - * function will allocate and fill with the runlist of the allocated clusters. - * It is the callers responsibility to ntfs_vfree() @*rl after she is finished - * with it. If the function was not successful, @*rl will be set to NULL. - * @*rl_len will contain the number of ntfs_runlist elements in @*rl or 0 if - * @*rl is NULL. - * - * Return 0 on success, or -errno on error. On success, @*location and @*count - * say what was really allocated. On -ENOSPC, @*location and @*count say what - * could have been allocated. If nothing could be allocated or a different - * error occured, @*location = -1 and @*count = 0. - * - * There are two data zones. First is the area between the end of the mft zone - * and the end of the volume, and second is the area between the start of the - * volume and the start of the mft zone. On unmodified/standard volumes, the - * second mft zone doesn't exist due to the mft zone being expanded to cover - * the start of volume in order to reserve space for the mft bitmap attribute. - * - * This is not the prettiest function but the complexity stems from the need of - * implementing the mft vs data zoned approach and from the fact that we have - * access to the lcn bitmap in portions of PAGE_SIZE bytes at a time, so we - * need to cope with crossing over boundaries of two pages. Further, the fact - * that the allocator allows for caller supplied hints as to the location of - * where allocation should begin and the fact that the allocator keeps track of - * where in the data zones the next natural allocation should occur, contribute - * to the complexity of the function. But it should all be worthwhile, because - * this allocator should: 1) be a full implementation of the MFT zone approach - * used by Windows, 2) cause reduction in fragmentation as much as possible, - * and 3) be speedy in allocations (the code is not optimized for speed, but - * the algorithm is, so further speed improvements are probably possible). - * - * FIXME: Really need finer-grained locking but this will do for the moment. I - * just want to kill all races and have a working allocator. When that is done, - * we can beautify... (AIA) - * - * FIXME: We should be monitoring cluster allocation and increment the MFT zone - * size dynamically but this is something for the future. We will just cause - * heavier fragmentation by not doing it and I am not even sure Windows would - * grow the MFT zone dynamically, so might even be correct not doing this. The - * overhead in doing dynamic MFT zone expansion would be very large and unlikely - * worth the effort. (AIA) - * - * TODO: I have added in double the required zone position pointer wrap around - * logic which can be optimized to having only one of the two logic sets. - * However, having the double logic will work fine, but if we have only one of - * the sets and we get it wrong somewhere, then we get into trouble, so - * removing the duplicate logic requires _very_ careful consideration of _all_ - * possible code paths. So at least for now, I am leaving the double logic - - * better safe than sorry... (AIA) + * Super operations for mount time when we don't have enough setup to use the + * proper functions. + */ +struct super_operations ntfs_mount_sops = { + .read_inode = ntfs_read_inode_mount, /* VFS: Load inode from disk, + called from iget(). */ + .clear_inode = ntfs_clear_big_inode, /* VFS: Called when an inode + is removed from memory. */ +}; + +typedef void (*read_inode2_t)(struct inode *, void *); + +/** + * The complete super operations. + */ +struct super_operations ntfs_sops = { + .read_inode2 = (read_inode2_t)ntfs_read_locked_inode, /* VFS: Load + inode from disk, called from + ntfs_iget(). */ + //.dirty_inode = ntfs_dirty_inode, /* VFS: Called from + // __mark_inode_dirty(). */ + .put_inode = ntfs_put_inode, /* VFS: Called just before the + inode reference count is + decreased. */ + .put_super = ntfs_put_super, /* Syscall: umount. */ + .statfs = ntfs_statfs, /* Syscall: statfs */ + .remount_fs = ntfs_remount, /* Syscall: mount -o remount. */ + .clear_inode = ntfs_clear_big_inode, /* VFS: Called when an inode is + removed from memory. */ + .show_options = ntfs_show_options, /* Show mount options in + proc. */ +}; + +/** + * ntfs_fill_super - mount an ntfs files system + * @sb: super block of ntfs file system to mount + * @opt: string containing the mount options + * @silent: silence error output + * + * ntfs_fill_super() is called by the VFS to mount the device described by @sb + * with the mount otions in @data with the NTFS file system. + * + * If @silent is true, remain silent even if errors are detected. This is used + * during bootup, when the kernel tries to mount the root file system with all + * registered file systems one after the other until one succeeds. This implies + * that all file systems except the correct one will quite correctly and + * expectedly return an error, but nobody wants to see error messages when in + * fact this is what is supposed to happen. + * + * NOTE: @sb->s_flags contains the mount options flags. */ -int ntfs_allocate_clusters(ntfs_volume *vol, ntfs_cluster_t *location, - ntfs_cluster_t *count, ntfs_runlist **rl, int *rl_len, - const NTFS_CLUSTER_ALLOCATION_ZONES zone) +static struct super_block *ntfs_read_super(struct super_block *sb, void *opt, const int silent) { - ntfs_runlist *rl2 = NULL, *rlt; - ntfs_attribute *data; - ntfs_cluster_t buf_pos, zone_start, zone_end, mft_zone_size; - ntfs_cluster_t lcn, last_read_pos, prev_lcn = (ntfs_cluster_t)0; - ntfs_cluster_t initial_location, prev_run_len = (ntfs_cluster_t)0; - ntfs_cluster_t clusters = (ntfs_cluster_t)0; - unsigned char *buf, *byte, bit, search_zone, done_zones; - unsigned char pass, need_writeback; - int rlpos = 0, rlsize, buf_size, err = 0; - ntfs_io io; - - ntfs_debug(DEBUG_OTHER, "%s(): Entering with *location = 0x%x, " - "*count = 0x%x, zone = %s_ZONE.\n", __FUNCTION__, - *location, *count, zone == DATA_ZONE ? "DATA" : "MFT"); - buf = (char*)__get_free_page(GFP_NOFS); - if (!buf) { - ntfs_debug(DEBUG_OTHER, "%s(): Returning -ENOMEM.\n", - __FUNCTION__); - return -ENOMEM; - } - io.fn_put = ntfs_put; - io.fn_get = ntfs_get; - lock_kernel(); - /* Get the $DATA attribute of $Bitmap. */ - data = ntfs_find_attr(vol->bitmap, vol->at_data, 0); - if (!data) { - err = -EINVAL; - goto err_ret; - } - /* - * If no specific location was requested, use the current data zone - * position, otherwise use the requested location but make sure it lies - * outside the mft zone. Also set done_zones to 0 (no zones done) and - * pass depending on whether we are starting inside a zone (1) or - * at the beginning of a zone (2). If requesting from the MFT_ZONE, then - * we either start at the current position within the mft zone or at the - * specified position and if the latter is out of bounds then we start - * at the beginning of the MFT_ZONE. - */ - done_zones = 0; - pass = 1; - /* - * zone_start and zone_end are the current search range. search_zone - * is 1 for mft zone, 2 for data zone 1 (end of mft zone till end of - * volume) and 4 for data zone 2 (start of volume till start of mft - * zone). - */ - zone_start = *location; - if (zone_start < 0) { - if (zone == DATA_ZONE) - zone_start = vol->data1_zone_pos; - else - zone_start = vol->mft_zone_pos; - if (!zone_start) - /* - * Zone starts at beginning of volume which means a - * single pass is sufficient. - */ - pass = 2; - } else if (zone_start >= vol->mft_zone_start && zone_start < - vol->mft_zone_end && zone == DATA_ZONE) { - zone_start = vol->mft_zone_end; - pass = 2; - } else if ((zone_start < vol->mft_zone_start || zone_start >= - vol->mft_zone_end) && zone == MFT_ZONE) { - zone_start = vol->mft_lcn; - if (!vol->mft_zone_end) - zone_start = (ntfs_cluster_t)0; - pass = 2; - } - if (zone == DATA_ZONE) { - /* Skip searching the mft zone. */ - done_zones |= 1; - if (zone_start >= vol->mft_zone_end) { - zone_end = vol->nr_clusters; - search_zone = 2; - } else { - zone_end = vol->mft_zone_start; - search_zone = 4; - } - } else /* if (zone == MFT_ZONE) */ { - zone_end = vol->mft_zone_end; - search_zone = 1; - } - /* - * buf_pos is the current bit position inside the bitmap. We use - * initial_location to determine whether or not to do a zone switch. - */ - buf_pos = initial_location = zone_start; - /* Loop until all clusters are allocated, i.e. clusters == 0. */ - clusters = *count; - rlpos = rlsize = 0; - if (*count <= 0) { - ntfs_debug(DEBUG_OTHER, "%s(): *count <= 0, " - "returning -EINVAL.\n", __FUNCTION__); - err = -EINVAL; - goto err_ret; - } - while (1) { - ntfs_debug(DEBUG_OTHER, "%s(): Start of outer while " - "loop: done_zones = 0x%x, search_zone = %i, " - "pass = %i, zone_start = 0x%x, zone_end = " - "0x%x, initial_location = 0x%x, buf_pos = " - "0x%x, rlpos = %i, rlsize = %i.\n", - __FUNCTION__, done_zones, search_zone, pass, - zone_start, zone_end, initial_location, buf_pos, - rlpos, rlsize); - /* Loop until we run out of free clusters. */ - io.param = buf; - io.size = PAGE_SIZE; - io.do_read = 1; - last_read_pos = buf_pos >> 3; - ntfs_debug(DEBUG_OTHER, "%s(): last_read_pos = 0x%x.\n", - __FUNCTION__, last_read_pos); - err = ntfs_readwrite_attr(vol->bitmap, data, last_read_pos, - &io); - if (err) { - ntfs_debug(DEBUG_OTHER, "%s(): ntfs_read_attr failed " - "with error code %i, going to " - "err_ret.\n", __FUNCTION__, -err); - goto err_ret; - } - if (!io.size) { - ntfs_debug(DEBUG_OTHER, "%s(): !io.size, going to " - "zone_pass_done.\n", __FUNCTION__); - goto zone_pass_done; - } - buf_size = io.size << 3; - lcn = buf_pos & 7; - buf_pos &= ~7; - need_writeback = 0; - ntfs_debug(DEBUG_OTHER, "%s(): Before inner while " - "loop: buf_size = 0x%x, lcn = 0x%x, buf_pos = " - "0x%x, need_writeback = %i.\n", __FUNCTION__, - buf_size, lcn, buf_pos, need_writeback); - while (lcn < buf_size && lcn + buf_pos < zone_end) { - byte = buf + (lcn >> 3); - ntfs_debug(DEBUG_OTHER, "%s(): In inner while loop: " - "buf_size = 0x%x, lcn = 0x%x, buf_pos " - "= 0x%x, need_writeback = %i, byte ofs " - "= 0x%x, *byte = 0x%x.\n", __FUNCTION__, - buf_size, lcn, buf_pos, need_writeback, - lcn >> 3, *byte); - /* Skip full bytes. */ - if (*byte == 0xff) { - lcn += 8; - ntfs_debug(DEBUG_OTHER, "%s(): continuing while" - " loop 1.\n", __FUNCTION__); - continue; - } - bit = 1 << (lcn & 7); - ntfs_debug(DEBUG_OTHER, "%s(): bit = %i.\n", - __FUNCTION__, bit); - /* If the bit is already set, go onto the next one. */ - if (*byte & bit) { - lcn++; - ntfs_debug(DEBUG_OTHER, "%s(): continuing while" - " loop 2.\n", __FUNCTION__); - continue; - } - /* Allocate the bitmap bit. */ - *byte |= bit; - /* We need to write this bitmap buffer back to disk! */ - need_writeback = 1; - ntfs_debug(DEBUG_OTHER, "%s(): *byte = 0x%x, " - "need_writeback = %i.\n", __FUNCTION__, - *byte, need_writeback); - /* Reallocate memory if necessary. */ - if ((rlpos + 2) * sizeof(ntfs_runlist) >= rlsize) { - ntfs_debug(DEBUG_OTHER, "%s(): Reallocating " - "space.\n", __FUNCTION__); - /* Setup first free bit return value. */ - if (!rl2) { - *location = lcn + buf_pos; - ntfs_debug(DEBUG_OTHER, "%s(): " - "*location = 0x%x.\n", - __FUNCTION__, - *location); - } - rlsize += PAGE_SIZE; - rlt = ntfs_vmalloc(rlsize); - if (!rlt) { - err = -ENOMEM; - ntfs_debug(DEBUG_OTHER, "%s(): Failed " - "to allocate memory, " - "returning -ENOMEM, " - "going to wb_err_ret.\n", - __FUNCTION__); - goto wb_err_ret; - } - if (rl2) { - ntfs_memcpy(rlt, rl2, rlsize - - PAGE_SIZE); - ntfs_vfree(rl2); - } - rl2 = rlt; - ntfs_debug(DEBUG_OTHER, "%s(): Reallocated " - "memory, rlsize = 0x%x.\n", - __FUNCTION__, rlsize); - } - /* - * Coalesce with previous run if adjacent LCNs. - * Otherwise, append a new run. - */ - ntfs_debug(DEBUG_OTHER, "%s(): Adding run (lcn 0x%x, " - "len 0x%x), prev_lcn = 0x%x, lcn = " - "0x%x, buf_pos = 0x%x, prev_run_len = " - "0x%x, rlpos = %i.\n", __FUNCTION__, - lcn + buf_pos, 1, prev_lcn, lcn, - buf_pos, prev_run_len, rlpos); - if (prev_lcn == lcn + buf_pos - prev_run_len && rlpos) { - ntfs_debug(DEBUG_OTHER, "%s(): Coalescing to " - "run (lcn 0x%x, len 0x%x).\n", - __FUNCTION__, - rl2[rlpos - 1].lcn, - rl2[rlpos - 1].len); - rl2[rlpos - 1].len = ++prev_run_len; - ntfs_debug(DEBUG_OTHER, "%s(): Run now (lcn " - "0x%x, len 0x%x), prev_run_len " - "= 0x%x.\n", __FUNCTION__, - rl2[rlpos - 1].lcn, - rl2[rlpos - 1].len, - prev_run_len); - } else { - if (rlpos) - ntfs_debug(DEBUG_OTHER, "%s(): Adding " - "new run, (previous " - "run lcn 0x%x, " - "len 0x%x).\n", - __FUNCTION__, - rl2[rlpos - 1].lcn, - rl2[rlpos - 1].len); - else - ntfs_debug(DEBUG_OTHER, "%s(): Adding " - "new run, is first " - "run.\n", __FUNCTION__); - rl2[rlpos].lcn = prev_lcn = lcn + buf_pos; - rl2[rlpos].len = prev_run_len = - (ntfs_cluster_t)1; - - rlpos++; - } - /* Done? */ - if (!--clusters) { - ntfs_cluster_t tc; - /* - * Update the current zone position. Positions - * of already scanned zones have been updated - * during the respective zone switches. - */ - tc = lcn + buf_pos + 1; - ntfs_debug(DEBUG_OTHER, "%s(): Done. Updating " - "current zone position, tc = " - "0x%x, search_zone = %i.\n", - __FUNCTION__, tc, search_zone); - switch (search_zone) { - case 1: - ntfs_debug(DEBUG_OTHER, - "%s(): Before checks, " - "vol->mft_zone_pos = " - "0x%x.\n", __FUNCTION__, - vol->mft_zone_pos); - if (tc >= vol->mft_zone_end) { - vol->mft_zone_pos = - vol->mft_lcn; - if (!vol->mft_zone_end) - vol->mft_zone_pos = - (ntfs_cluster_t)0; - } else if ((initial_location >= - vol->mft_zone_pos || - tc > vol->mft_zone_pos) - && tc >= vol->mft_lcn) - vol->mft_zone_pos = tc; - ntfs_debug(DEBUG_OTHER, - "%s(): After checks, " - "vol->mft_zone_pos = " - "0x%x.\n", __FUNCTION__, - vol->mft_zone_pos); - break; - case 2: - ntfs_debug(DEBUG_OTHER, - "%s(): Before checks, " - "vol->data1_zone_pos = " - "0x%x.\n", __FUNCTION__, - vol->data1_zone_pos); - if (tc >= vol->nr_clusters) - vol->data1_zone_pos = - vol->mft_zone_end; - else if ((initial_location >= - vol->data1_zone_pos || - tc > vol->data1_zone_pos) - && tc >= vol->mft_zone_end) - vol->data1_zone_pos = tc; - ntfs_debug(DEBUG_OTHER, - "%s(): After checks, " - "vol->data1_zone_pos = " - "0x%x.\n", __FUNCTION__, - vol->data1_zone_pos); - break; - case 4: - ntfs_debug(DEBUG_OTHER, - "%s(): Before checks, " - "vol->data2_zone_pos = " - "0x%x.\n", __FUNCTION__, - vol->data2_zone_pos); - if (tc >= vol->mft_zone_start) - vol->data2_zone_pos = - (ntfs_cluster_t)0; - else if (initial_location >= - vol->data2_zone_pos || - tc > vol->data2_zone_pos) - vol->data2_zone_pos = tc; - ntfs_debug(DEBUG_OTHER, - "%s(): After checks, " - "vol->data2_zone_pos = " - "0x%x.\n", __FUNCTION__, - vol->data2_zone_pos); - break; - default: - BUG(); - } - ntfs_debug(DEBUG_OTHER, "%s(): Going to " - "done_ret.\n", __FUNCTION__); - goto done_ret; - } - lcn++; - } - buf_pos += buf_size; - ntfs_debug(DEBUG_OTHER, "%s(): After inner while " - "loop: buf_size = 0x%x, lcn = 0x%x, buf_pos = " - "0x%x, need_writeback = %i.\n", __FUNCTION__, - buf_size, lcn, buf_pos, need_writeback); - if (need_writeback) { - ntfs_debug(DEBUG_OTHER, "%s(): Writing back.\n", - __FUNCTION__); - need_writeback = 0; - io.param = buf; - io.do_read = 0; - err = ntfs_readwrite_attr(vol->bitmap, data, - last_read_pos, &io); - if (err) { - ntfs_error("%s(): Bitmap writeback failed " - "in read next buffer code " - "path with error code %i.\n", - __FUNCTION__, -err); - goto err_ret; - } - } - if (buf_pos < zone_end) { - ntfs_debug(DEBUG_OTHER, "%s(): Continuing " - "outer while loop, buf_pos = 0x%x, " - "zone_end = 0x%x.\n", __FUNCTION__, - buf_pos, zone_end); - continue; - } -zone_pass_done: /* Finished with the current zone pass. */ - ntfs_debug(DEBUG_OTHER, "%s(): At zone_pass_done, pass = %i.\n", - __FUNCTION__, pass); - if (pass == 1) { - /* - * Now do pass 2, scanning the first part of the zone - * we omitted in pass 1. - */ - pass = 2; - zone_end = zone_start; - switch (search_zone) { - case 1: /* mft_zone */ - zone_start = vol->mft_zone_start; - break; - case 2: /* data1_zone */ - zone_start = vol->mft_zone_end; - break; - case 4: /* data2_zone */ - zone_start = (ntfs_cluster_t)0; - break; - default: - BUG(); - } - /* Sanity check. */ - if (zone_end < zone_start) - zone_end = zone_start; - buf_pos = zone_start; - ntfs_debug(DEBUG_OTHER, "%s(): Continuing " - "outer while loop, pass = 2, " - "zone_start = 0x%x, zone_end = 0x%x, " - "buf_pos = 0x%x.\n", __FUNCTION__, - zone_start, zone_end, buf_pos); - continue; - } /* pass == 2 */ -done_zones_check: - ntfs_debug(DEBUG_OTHER, "%s(): At done_zones_check, " - "search_zone = %i, done_zones before = 0x%x, " - "done_zones after = 0x%x.\n", __FUNCTION__, - search_zone, done_zones, done_zones | - search_zone); - done_zones |= search_zone; - if (done_zones < 7) { - ntfs_debug(DEBUG_OTHER, "%s(): Switching zone.\n", - __FUNCTION__); - /* Now switch to the next zone we haven't done yet. */ - pass = 1; - switch (search_zone) { - case 1: - ntfs_debug(DEBUG_OTHER, "%s(): Switching from " - "mft zone to data1 zone.\n", - __FUNCTION__); - /* Update mft zone position. */ - if (rlpos) { - ntfs_cluster_t tc; - ntfs_debug(DEBUG_OTHER, - "%s(): Before checks, " - "vol->mft_zone_pos = " - "0x%x.\n", __FUNCTION__, - vol->mft_zone_pos); - tc = rl2[rlpos - 1].lcn + - rl2[rlpos - 1].len; - if (tc >= vol->mft_zone_end) { - vol->mft_zone_pos = - vol->mft_lcn; - if (!vol->mft_zone_end) - vol->mft_zone_pos = - (ntfs_cluster_t)0; - } else if ((initial_location >= - vol->mft_zone_pos || - tc > vol->mft_zone_pos) - && tc >= vol->mft_lcn) - vol->mft_zone_pos = tc; - ntfs_debug(DEBUG_OTHER, - "%s(): After checks, " - "vol->mft_zone_pos = " - "0x%x.\n", __FUNCTION__, - vol->mft_zone_pos); - } - /* Switch from mft zone to data1 zone. */ -switch_to_data1_zone: search_zone = 2; - zone_start = initial_location = - vol->data1_zone_pos; - zone_end = vol->nr_clusters; - if (zone_start == vol->mft_zone_end) - pass = 2; - if (zone_start >= zone_end) { - vol->data1_zone_pos = zone_start = - vol->mft_zone_end; - pass = 2; - } - break; - case 2: - ntfs_debug(DEBUG_OTHER, "%s(): Switching from " - "data1 zone to data2 zone.\n", - __FUNCTION__); - /* Update data1 zone position. */ - if (rlpos) { - ntfs_cluster_t tc; - ntfs_debug(DEBUG_OTHER, - "%s(): Before checks, " - "vol->data1_zone_pos = " - "0x%x.\n", __FUNCTION__, - vol->data1_zone_pos); - tc = rl2[rlpos - 1].lcn + - rl2[rlpos - 1].len; - if (tc >= vol->nr_clusters) - vol->data1_zone_pos = - vol->mft_zone_end; - else if ((initial_location >= - vol->data1_zone_pos || - tc > vol->data1_zone_pos) - && tc >= vol->mft_zone_end) - vol->data1_zone_pos = tc; - ntfs_debug(DEBUG_OTHER, - "%s(): After checks, " - "vol->data1_zone_pos = " - "0x%x.\n", __FUNCTION__, - vol->data1_zone_pos); - } - /* Switch from data1 zone to data2 zone. */ - search_zone = 4; - zone_start = initial_location = - vol->data2_zone_pos; - zone_end = vol->mft_zone_start; - if (!zone_start) - pass = 2; - if (zone_start >= zone_end) { - vol->data2_zone_pos = zone_start = - initial_location = - (ntfs_cluster_t)0; - pass = 2; - } - break; - case 4: - ntfs_debug(DEBUG_OTHER, "%s(): Switching from " - "data2 zone to data1 zone.\n", - __FUNCTION__); - /* Update data2 zone position. */ - if (rlpos) { - ntfs_cluster_t tc; - ntfs_debug(DEBUG_OTHER, - "%s(): Before checks, " - "vol->data2_zone_pos = " - "0x%x.\n", __FUNCTION__, - vol->data2_zone_pos); - tc = rl2[rlpos - 1].lcn + - rl2[rlpos - 1].len; - if (tc >= vol->mft_zone_start) - vol->data2_zone_pos = - (ntfs_cluster_t)0; - else if (initial_location >= - vol->data2_zone_pos || - tc > vol->data2_zone_pos) - vol->data2_zone_pos = tc; - ntfs_debug(DEBUG_OTHER, - "%s(): After checks, " - "vol->data2_zone_pos = " - "0x%x.\n", __FUNCTION__, - vol->data2_zone_pos); - } - /* Switch from data2 zone to data1 zone. */ - goto switch_to_data1_zone; /* See above. */ - default: - BUG(); - } - ntfs_debug(DEBUG_OTHER, "%s(): After zone switch, " - "search_zone = %i, pass = %i, " - "initial_location = 0x%x, zone_start " - "= 0x%x, zone_end = 0x%x.\n", - __FUNCTION__, search_zone, pass, - initial_location, zone_start, zone_end); - buf_pos = zone_start; - if (zone_start == zone_end) { - ntfs_debug(DEBUG_OTHER, "%s(): Empty zone, " - "going to done_zones_check.\n", - __FUNCTION__); - /* Empty zone. Don't bother searching it. */ - goto done_zones_check; - } - ntfs_debug(DEBUG_OTHER, "%s(): Continuing outer while " - "loop.\n", __FUNCTION__); - continue; - } /* done_zones == 7 */ - ntfs_debug(DEBUG_OTHER, "%s(): All zones are finished.\n", - __FUNCTION__); - /* - * All zones are finished! If DATA_ZONE, shrink mft zone. If - * MFT_ZONE, we have really run out of space. - */ - mft_zone_size = vol->mft_zone_end - vol->mft_zone_start; - ntfs_debug(DEBUG_OTHER, "%s(): vol->mft_zone_start = 0x%x, " - "vol->mft_zone_end = 0x%x, mft_zone_size = " - "0x%x.\n", __FUNCTION__, vol->mft_zone_start, - vol->mft_zone_end, mft_zone_size); - if (zone == MFT_ZONE || mft_zone_size <= (ntfs_cluster_t)0) { - ntfs_debug(DEBUG_OTHER, "%s(): No free clusters left, " - "returning -ENOSPC, going to " - "fail_ret.\n", __FUNCTION__); - /* Really no more space left on device. */ - err = -ENOSPC; - goto fail_ret; - } /* zone == DATA_ZONE && mft_zone_size > 0 */ - ntfs_debug(DEBUG_OTHER, "%s(): Shrinking mft zone.\n", - __FUNCTION__); - zone_end = vol->mft_zone_end; - mft_zone_size >>= 1; - if (mft_zone_size > (ntfs_cluster_t)0) - vol->mft_zone_end = vol->mft_zone_start + mft_zone_size; - else /* mft zone and data2 zone no longer exist. */ - vol->data2_zone_pos = vol->mft_zone_start = - vol->mft_zone_end = (ntfs_cluster_t)0; - if (vol->mft_zone_pos >= vol->mft_zone_end) { - vol->mft_zone_pos = vol->mft_lcn; - if (!vol->mft_zone_end) - vol->mft_zone_pos = (ntfs_cluster_t)0; - } - buf_pos = zone_start = initial_location = - vol->data1_zone_pos = vol->mft_zone_end; - search_zone = 2; - pass = 2; - done_zones &= ~2; - ntfs_debug(DEBUG_OTHER, "%s(): After shrinking mft " - "zone, mft_zone_size = 0x%x, " - "vol->mft_zone_start = 0x%x, vol->mft_zone_end " - "= 0x%x, vol->mft_zone_pos = 0x%x, search_zone " - "= 2, pass = 2, dones_zones = 0x%x, zone_start " - "= 0x%x, zone_end = 0x%x, vol->data1_zone_pos " - "= 0x%x, continuing outer while loop.\n", - __FUNCTION__, mft_zone_size, - vol->mft_zone_start, vol->mft_zone_end, - vol->mft_zone_pos, done_zones, zone_start, - zone_end, vol->data1_zone_pos); - } - ntfs_debug(DEBUG_OTHER, "%s(): After outer while loop.\n", - __FUNCTION__); -done_ret: - ntfs_debug(DEBUG_OTHER, "%s(): At done_ret.\n", __FUNCTION__); - rl2[rlpos].lcn = (ntfs_cluster_t)-1; - rl2[rlpos].len = (ntfs_cluster_t)0; - *rl = rl2; - *rl_len = rlpos; - if (need_writeback) { - ntfs_debug(DEBUG_OTHER, "%s(): Writing back.\n", __FUNCTION__); - need_writeback = 0; - io.param = buf; - io.do_read = 0; - err = ntfs_readwrite_attr(vol->bitmap, data, last_read_pos, - &io); - if (err) { - ntfs_error("%s(): Bitmap writeback failed in done " - "code path with error code %i.\n", - __FUNCTION__, -err); - goto err_ret; - } - ntfs_debug(DEBUG_OTHER, "%s(): Wrote 0x%Lx bytes.\n", - __FUNCTION__, io.size); + extern int *blksize_size[]; + ntfs_volume *vol; + struct buffer_head *bh; + struct inode *tmp_ino; + int old_blocksize, result; + kdev_t dev = sb->s_dev; + + ntfs_debug("Entering."); +#ifndef NTFS_RW + sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME; +#endif + /* Allocate a new ntfs_volume and place it in sb->u.generic_sbp. */ + sb->u.generic_sbp = kmalloc(sizeof(ntfs_volume), GFP_NOFS); + vol = NTFS_SB(sb); + if (!vol) { + if (!silent) + ntfs_error(sb, "Allocation of NTFS volume structure " + "failed. Aborting mount..."); + goto err_out_now; + } + /* Initialize ntfs_volume structure. */ + memset(vol, 0, sizeof(ntfs_volume)); + vol->sb = sb; + init_MUTEX(&vol->inode_lock); + vol->upcase = NULL; + vol->mft_ino = NULL; + vol->mftbmp_ino = NULL; + init_rwsem(&vol->mftbmp_lock); + vol->mftmirr_ino = NULL; + vol->lcnbmp_ino = NULL; + init_rwsem(&vol->lcnbmp_lock); + vol->vol_ino = NULL; + vol->root_ino = NULL; + vol->secure_ino = NULL; + vol->uid = vol->gid = 0; + vol->flags = 0; + vol->on_errors = 0; + vol->mft_zone_multiplier = 0; + vol->nls_map = NULL; + + /* + * Default is group and other don't have any access to files or + * directories while owner has full access. Further, files by default + * are not executable but directories are of course browseable. + */ + vol->fmask = 0177; + vol->dmask = 0077; + + /* Important to get the mount options dealt with now. */ + if (!parse_options(vol, (char*)opt)) + goto err_out_now; + + /* + * TODO: Fail safety check. In the future we should really be able to + * cope with this being the case, but for now just bail out. + */ + if (get_hardsect_size(dev) > NTFS_BLOCK_SIZE) { + if (!silent) + ntfs_error(sb, "Device has unsupported hardsect_size."); + goto err_out_now; } -done_fail_ret: - ntfs_debug(DEBUG_OTHER, "%s(): At done_fail_ret (follows done_ret).\n", - __FUNCTION__); - unlock_kernel(); - free_page((unsigned long)buf); - if (err) - ntfs_debug(DEBUG_FILE3, "%s(): Failed to allocate " - "clusters. Returning with error code %i.\n", - __FUNCTION__, -err); - ntfs_debug(DEBUG_OTHER, "%s(): Syncing $Bitmap inode.\n", __FUNCTION__); - if (ntfs_update_inode(vol->bitmap)) - ntfs_error("%s(): Failed to sync inode $Bitmap. " - "Continuing anyway.\n", __FUNCTION__); - ntfs_debug(DEBUG_OTHER, "%s(): Returning with code %i.\n", __FUNCTION__, - err); - return err; -fail_ret: - ntfs_debug(DEBUG_OTHER, "%s(): At fail_ret.\n", __FUNCTION__); - if (rl2) { - if (err == -ENOSPC) { - /* Return first free lcn and count of free clusters. */ - *location = rl2[0].lcn; - *count -= clusters; - ntfs_debug(DEBUG_OTHER, "%s(): err = -ENOSPC, " - "*location = 0x%x, *count = 0x%x.\n", - __FUNCTION__, *location, *count); + + /* Setup the device access block size to NTFS_BLOCK_SIZE. */ + if (!blksize_size[major(dev)]) + old_blocksize = BLOCK_SIZE; + else + old_blocksize = blksize_size[major(dev)][minor(dev)]; + if (sb_set_blocksize(sb, NTFS_BLOCK_SIZE) != NTFS_BLOCK_SIZE) { + if (!silent) + ntfs_error(sb, "Unable to set block size."); + goto set_blk_size_err_out_now; + } + + /* Get the size of the device in units of NTFS_BLOCK_SIZE bytes. */ + vol->nr_blocks = sb->s_bdev->bd_inode->i_size >> NTFS_BLOCK_SIZE_BITS; + + /* Read the boot sector and return unlocked buffer head to it. */ + if (!(bh = read_ntfs_boot_sector(sb, silent))) { + if (!silent) + ntfs_error(sb, "Not an NTFS volume."); + goto set_blk_size_err_out_now; + } + + /* + * Extract the data from the boot sector and setup the ntfs super block + * using it. + */ + result = parse_ntfs_boot_sector(vol, (NTFS_BOOT_SECTOR*)bh->b_data); + + brelse(bh); + + if (!result) { + if (!silent) + ntfs_error(sb, "Unsupported NTFS filesystem."); + goto set_blk_size_err_out_now; + } + + /* + * TODO: When we start coping with sector sizes different from + * NTFS_BLOCK_SIZE, we now probably need to set the blocksize of the + * device (probably to NTFS_BLOCK_SIZE). + */ + + /* Setup remaining fields in the super block. */ + sb->s_magic = NTFS_SB_MAGIC; + + /* + * Ntfs allows 63 bits for the file size, i.e. correct would be: + * sb->s_maxbytes = ~0ULL >> 1; + * But the kernel uses a long as the page cache page index which on + * 32-bit architectures is only 32-bits. MAX_LFS_FILESIZE is kernel + * defined to the maximum the page cache page index can cope with + * without overflowing the index or to 2^63 - 1, whichever is smaller. + */ + sb->s_maxbytes = MAX_LFS_FILESIZE; + + /* + * Now load the metadata required for the page cache and our address + * space operations to function. We do this by setting up a specialised + * read_inode method and then just calling the normal iget() to obtain + * the inode for $MFT which is sufficient to allow our normal inode + * operations and associated address space operations to function. + */ + /* + * Poison vol->mft_ino so we know whether iget() called into our + * ntfs_read_inode_mount() method. + */ +#define OGIN ((struct inode*)le32_to_cpu(0x4e49474f)) /* OGIN */ + vol->mft_ino = OGIN; + sb->s_op = &ntfs_mount_sops; + tmp_ino = iget(vol->sb, FILE_MFT); + if (!tmp_ino || tmp_ino != vol->mft_ino || is_bad_inode(tmp_ino)) { + if (!silent) + ntfs_error(sb, "Failed to load essential metadata."); + if (tmp_ino && vol->mft_ino == OGIN) + ntfs_error(sb, "BUG: iget() did not call " + "ntfs_read_inode_mount() method!\n"); + if (!tmp_ino) + goto cond_iput_mft_ino_err_out_now; + goto iput_tmp_ino_err_out_now; + } + /* + * Note: sb->s_op has already been set to &ntfs_sops by our specialized + * ntfs_read_inode_mount() method when it was invoked by iget(). + */ + down(&ntfs_lock); + /* + * The current mount is a compression user if the cluster size is + * less than or equal 4kiB. + */ + if (vol->cluster_size <= 4096 && !ntfs_nr_compression_users++) { + result = allocate_compression_buffers(); + if (result) { + ntfs_error(NULL, "Failed to allocate buffers " + "for compression engine."); + ntfs_nr_compression_users--; + up(&ntfs_lock); + goto iput_tmp_ino_err_out_now; } - /* Deallocate all allocated clusters. */ - ntfs_debug(DEBUG_OTHER, "%s(): Deallocating allocated " - "clusters.\n", __FUNCTION__); - ntfs_deallocate_clusters(vol, rl2, rlpos); - /* Free the runlist. */ - ntfs_vfree(rl2); - } else { - if (err == -ENOSPC) { - /* Nothing free at all. */ - *location = vol->data1_zone_pos; /* Irrelevant... */ - *count = 0; - ntfs_debug(DEBUG_OTHER, "%s(): No space left at all, " - "err = -ENOSPC, *location = 0x%x, " - "*count = 0.\n", - __FUNCTION__, *location); + } + /* + * Increment the number of mounts and generate the global default + * upcase table if necessary. Also temporarily increment the number of + * upcase users to avoid race conditions with concurrent (u)mounts. + */ + if (!ntfs_nr_mounts++) + default_upcase = generate_default_upcase(); + ntfs_nr_upcase_users++; + + up(&ntfs_lock); + /* + * From now on, ignore @silent parameter. If we fail below this line, + * it will be due to a corrupt fs or a system error, so we report it. + */ + /* + * Open the system files with normal access functions and complete + * setting up the ntfs super block. + */ + if (!load_system_files(vol)) { + ntfs_error(sb, "Failed to load system files."); + goto unl_upcase_iput_tmp_ino_err_out_now; + } + if ((sb->s_root = d_alloc_root(vol->root_ino))) { + /* We increment i_count simulating an ntfs_iget(). */ + atomic_inc(&vol->root_ino->i_count); + ntfs_debug("Exiting, status successful."); + /* Release the default upcase if it has no users. */ + down(&ntfs_lock); + if (!--ntfs_nr_upcase_users && default_upcase) { + ntfs_free(default_upcase); + default_upcase = NULL; } + up(&ntfs_lock); + goto ntfs_read_super_ret; } - *rl = NULL; - *rl_len = 0; - ntfs_debug(DEBUG_OTHER, "%s(): *rl = NULL, *rl_len = 0, " - "going to done_fail_ret.\n", __FUNCTION__); - goto done_fail_ret; -wb_err_ret: - ntfs_debug(DEBUG_OTHER, "%s(): At wb_err_ret.\n", __FUNCTION__); - if (need_writeback) { - int __err; - ntfs_debug(DEBUG_OTHER, "%s(): Writing back.\n", __FUNCTION__); - io.param = buf; - io.do_read = 0; - __err = ntfs_readwrite_attr(vol->bitmap, data, last_read_pos, - &io); - if (__err) - ntfs_error("%s(): Bitmap writeback failed in error " - "code path with error code %i.\n", - __FUNCTION__, -__err); - need_writeback = 0; - } -err_ret: - ntfs_debug(DEBUG_OTHER, "%s(): At err_ret, *location = -1, " - "*count = 0, going to fail_ret.\n", __FUNCTION__); - *location = -1; - *count = 0; - goto fail_ret; + ntfs_error(sb, "Failed to allocate root directory."); + /* Clean up after the successful load_system_files() call from above. */ + iput(vol->vol_ino); + vol->vol_ino = NULL; + /* NTFS 3.0+ specific clean up. */ + if (vol->major_ver >= 3) { + iput(vol->secure_ino); + vol->secure_ino = NULL; + } + iput(vol->root_ino); + vol->root_ino = NULL; + iput(vol->lcnbmp_ino); + vol->lcnbmp_ino = NULL; + iput(vol->mftmirr_ino); + vol->mftmirr_ino = NULL; + iput(vol->mftbmp_ino); + vol->mftbmp_ino = NULL; + vol->upcase_len = 0; + if (vol->upcase != default_upcase) + ntfs_free(vol->upcase); + vol->upcase = NULL; + if (vol->nls_map) { + unload_nls(vol->nls_map); + vol->nls_map = NULL; + } + /* Error exit code path. */ +unl_upcase_iput_tmp_ino_err_out_now: + /* + * Decrease the number of mounts and destroy the global default upcase + * table if necessary. + */ + down(&ntfs_lock); + ntfs_nr_mounts--; + if (!--ntfs_nr_upcase_users && default_upcase) { + ntfs_free(default_upcase); + default_upcase = NULL; + } + if (vol->cluster_size <= 4096 && !--ntfs_nr_compression_users) + free_compression_buffers(); + up(&ntfs_lock); +iput_tmp_ino_err_out_now: + iput(tmp_ino); +cond_iput_mft_ino_err_out_now: + if (vol->mft_ino && vol->mft_ino != OGIN && vol->mft_ino != tmp_ino) { + iput(vol->mft_ino); + vol->mft_ino = NULL; + } +#undef OGIN + /* + * This is needed to get ntfs_clear_extent_inode() called for each + * inode we have ever called ntfs_iget()/iput() on, otherwise we A) + * leak resources and B) a subsequent mount fails automatically due to + * ntfs_iget() never calling down into our ntfs_read_locked_inode() + * method again... FIXME: Do we need to do this twice now because of + * attribute inodes? I think not, so leave as is for now... (AIA) + */ + if (invalidate_inodes(sb)) { + ntfs_error(sb, "Busy inodes left. This is most likely a NTFS " + "driver bug."); + /* Copied from fs/super.c. I just love this message. (-; */ + printk("NTFS: Busy inodes after umount. Self-destruct in 5 " + "seconds. Have a nice day...\n"); + } +set_blk_size_err_out_now: + sb_set_blocksize(sb, old_blocksize); + /* Errors at this stage are irrelevant. */ +err_out_now: + sb->u.generic_sbp = NULL; + kfree(vol); + sb = NULL; + ntfs_debug("Failed, returning -EINVAL."); +ntfs_read_super_ret: + return sb; } /* - * IMPORTANT: Caller has to hold big kernel lock or the race monster will come - * to get you! (-; - * TODO: Need our own lock for bitmap accesses but BKL is more secure for now, - * considering we might not have covered all places with a lock yet. In that - * case the BKL offers a one way exclusion which is better than no exclusion - * at all... (AIA) + * This is a slab cache to optimize allocations and deallocations of Unicode + * strings of the maximum length allowed by NTFS, which is NTFS_MAX_NAME_LEN + * (255) Unicode characters + a terminating NULL Unicode character. */ -static int ntfs_clear_bitrange(ntfs_inode *bitmap, - const ntfs_cluster_t start_bit, const ntfs_cluster_t count) -{ - ntfs_cluster_t buf_size, bit, nr_bits = count; - unsigned char *buf, *byte; - int err; - ntfs_io io; - - io.fn_put = ntfs_put; - io.fn_get = ntfs_get; - /* Calculate the required buffer size in bytes. */ - buf_size = (ntfs_cluster_t)((start_bit & 7) + nr_bits + 7) >> 3; - if (buf_size <= (ntfs_cluster_t)(64 * 1024)) - buf = ntfs_malloc(buf_size); - else - buf = ntfs_vmalloc(buf_size); - if (!buf) - return -ENOMEM; - /* Read the bitmap from the data attribute. */ - io.param = byte = buf; - io.size = buf_size; - err = ntfs_read_attr(bitmap, bitmap->vol->at_data, 0, start_bit >> 3, - &io); - if (err || io.size != buf_size) - goto err_out; - /* Now clear the bits in the read bitmap. */ - bit = start_bit & 7; - while (bit && nr_bits) { /* Process first partial byte, if present. */ - *byte &= ~(1 << bit++); - nr_bits--; - bit &= 7; - if (!bit) - byte++; - } - while (nr_bits >= 8) { /* Process full bytes. */ - *byte = 0; - nr_bits -= 8; - byte++; - } - bit = 0; - while (nr_bits) { /* Process last partial byte, if present. */ - *byte &= ~(1 << bit); - nr_bits--; - bit++; - } - /* Write the modified bitmap back to disk. */ - io.param = buf; - io.size = buf_size; - err = ntfs_write_attr(bitmap, bitmap->vol->at_data, 0, start_bit >> 3, - &io); -err_out: - if (buf_size <= (ntfs_cluster_t)(64 * 1024)) - ntfs_free(buf); - else - ntfs_vfree(buf); - if (!err && io.size != buf_size) - err = -EIO; - return err; -} +kmem_cache_t *ntfs_name_cache; + +/* Slab caches for efficient allocation/deallocation of of inodes. */ +kmem_cache_t *ntfs_inode_cache; /* - * See comments for lack of zone adjustments below in the description of the - * function ntfs_deallocate_clusters(). + * Slab cache to optimize allocations and deallocations of attribute search + * contexts. */ -int ntfs_deallocate_cluster_run(const ntfs_volume *vol, - const ntfs_cluster_t lcn, const ntfs_cluster_t len) +kmem_cache_t *ntfs_attr_ctx_cache; + +/* A global default upcase table and a corresponding reference count. */ +wchar_t *default_upcase = NULL; +unsigned long ntfs_nr_upcase_users = 0; + +/* The number of mounted filesystems. */ +unsigned long ntfs_nr_mounts = 0; + +/* Driver wide semaphore. */ +DECLARE_MUTEX(ntfs_lock); + +/* Define the filesystem */ +static DECLARE_FSTYPE_DEV(ntfs_fs_type, "ntfs", ntfs_read_super); + +/* Stable names for the slab caches. */ +static const char *ntfs_attr_ctx_cache_name = "ntfs_actx_cache"; +static const char *ntfs_name_cache_name = "ntfs_name_cache"; +static const char *ntfs_inode_cache_name = "ntfs_inode_cache"; + +static int __init init_ntfs_fs(void) { - int err; + int err = 0; - lock_kernel(); - err = ntfs_clear_bitrange(vol->bitmap, lcn, len); - unlock_kernel(); + /* This may be ugly but it results in pretty output so who cares. (-8 */ + printk(KERN_INFO "NTFS driver " NTFS_VERSION " [Flags: R/" +#ifdef NTFS_RW + "W" +#else + "O" +#endif +#ifdef DEBUG + " DEBUG" +#endif +#ifdef MODULE + " MODULE" +#endif + "].\n"); + + ntfs_debug("Debug messages are enabled."); + + ntfs_attr_ctx_cache = kmem_cache_create(ntfs_attr_ctx_cache_name, + sizeof(attr_search_context), 0 /* offset */, + SLAB_HWCACHE_ALIGN, NULL /* ctor */, NULL /* dtor */); + if (!ntfs_attr_ctx_cache) { + printk(KERN_CRIT "NTFS: Failed to create %s!\n", + ntfs_attr_ctx_cache_name); + goto ctx_err_out; + } + + ntfs_name_cache = kmem_cache_create(ntfs_name_cache_name, + (NTFS_MAX_NAME_LEN+1) * sizeof(uchar_t), 0, + SLAB_HWCACHE_ALIGN, NULL, NULL); + if (!ntfs_name_cache) { + printk(KERN_CRIT "NTFS: Failed to create %s!\n", + ntfs_name_cache_name); + goto name_err_out; + } + + ntfs_inode_cache = kmem_cache_create(ntfs_inode_cache_name, + sizeof(ntfs_inode), 0, SLAB_HWCACHE_ALIGN, NULL, NULL); + if (!ntfs_inode_cache) { + printk(KERN_CRIT "NTFS: Failed to create %s!\n", + ntfs_inode_cache_name); + goto inode_err_out; + } + + /* Register the ntfs sysctls. */ + err = ntfs_sysctl(1); + if (err) { + printk(KERN_CRIT "NTFS: Failed to register NTFS sysctls!\n"); + goto sysctl_err_out; + } + + err = register_filesystem(&ntfs_fs_type); + if (!err) { + ntfs_debug("NTFS driver registered successfully."); + return 0; /* Success! */ + } + printk(KERN_CRIT "NTFS: Failed to register NTFS file system driver!\n"); + +sysctl_err_out: + kmem_cache_destroy(ntfs_inode_cache); +inode_err_out: + kmem_cache_destroy(ntfs_name_cache); +name_err_out: + kmem_cache_destroy(ntfs_attr_ctx_cache); +ctx_err_out: + if (!err) { + printk(KERN_CRIT "NTFS: Aborting NTFS file system driver " + "registration...\n"); + err = -ENOMEM; + } return err; } -/* - * This is inefficient, but logically trivial, so will do for now. Note, we - * do not touch the mft nor the data zones here because we want to minimize - * recycling of clusters to enhance the chances of data being undeleteable. - * Also we don't want the overhead. Instead we do one additional sweep of the - * current data zone during cluster allocation to check for freed clusters. - */ -int ntfs_deallocate_clusters(const ntfs_volume *vol, const ntfs_runlist *rl, - const int rl_len) +static void __exit exit_ntfs_fs(void) { - int i, err; + int err = 0; - lock_kernel(); - for (i = err = 0; i < rl_len && !err; i++) - err = ntfs_clear_bitrange(vol->bitmap, rl[i].lcn, rl[i].len); - unlock_kernel(); - return err; + ntfs_debug("Unregistering NTFS driver."); + + unregister_filesystem(&ntfs_fs_type); + + if (kmem_cache_destroy(ntfs_inode_cache) && (err = 1)) + printk(KERN_CRIT "NTFS: Failed to destory %s.\n", + ntfs_inode_cache_name); + if (kmem_cache_destroy(ntfs_name_cache) && (err = 1)) + printk(KERN_CRIT "NTFS: Failed to destory %s.\n", + ntfs_name_cache_name); + if (kmem_cache_destroy(ntfs_attr_ctx_cache) && (err = 1)) + printk(KERN_CRIT "NTFS: Failed to destory %s.\n", + ntfs_attr_ctx_cache_name); + if (err) + printk(KERN_CRIT "NTFS: This causes memory to leak! There is " + "probably a BUG in the driver! Please report " + "you saw this message to " + "linux-ntfs-dev@lists.sf.net\n"); + /* Unregister the ntfs sysctls. */ + ntfs_sysctl(0); } +EXPORT_NO_SYMBOLS; +MODULE_AUTHOR("Anton Altaparmakov "); +MODULE_DESCRIPTION("NTFS 1.2/3.x driver - Copyright (c) 2001-2003 Anton Altaparmakov"); +MODULE_LICENSE("GPL"); +#ifdef DEBUG +MODULE_PARM(debug_msgs, "i"); +MODULE_PARM_DESC(debug_msgs, "Enable debug messages."); +#endif + +module_init(init_ntfs_fs) +module_exit(exit_ntfs_fs) + diff -urN linux-2.4.24-vanilla/fs/ntfs/super.h linux-2.4.24-ntfs-2.1.6a/fs/ntfs/super.h --- linux-2.4.24-vanilla/fs/ntfs/super.h 2001-09-08 20:24:40.000000000 +0100 +++ linux-2.4.24-ntfs-2.1.6a/fs/ntfs/super.h 1970-01-01 01:00:00.000000000 +0100 @@ -1,32 +0,0 @@ -/* - * super.h - Header file for super.c - * - * Copyright (C) 1995-1997 Martin von Löwis - * Copyright (C) 1996-1997 Régis Duchesne - * Copyright (c) 2001 Anton Altaparmakov - */ - -int ntfs_get_free_cluster_count(ntfs_inode *bitmap); - -int ntfs_get_volumesize(ntfs_volume *vol, __s64 *vol_size); - -int ntfs_init_volume(ntfs_volume *vol, char *boot); - -int ntfs_load_special_files(ntfs_volume *vol); - -int ntfs_release_volume(ntfs_volume *vol); - -int ntfs_insert_fixups(unsigned char *rec, int rec_size); - -int ntfs_fixup_record(char *record, char *magic, int size); - -int ntfs_allocate_clusters(ntfs_volume *vol, ntfs_cluster_t *location, - ntfs_cluster_t *count, ntfs_runlist **rl, int *rl_len, - const NTFS_CLUSTER_ALLOCATION_ZONES zone); - -int ntfs_deallocate_cluster_run(const ntfs_volume *vol, - const ntfs_cluster_t lcn, const ntfs_cluster_t len); - -int ntfs_deallocate_clusters(const ntfs_volume *vol, const ntfs_runlist *rl, - const int rl_len); - diff -urN linux-2.4.24-vanilla/fs/ntfs/support.c linux-2.4.24-ntfs-2.1.6a/fs/ntfs/support.c --- linux-2.4.24-vanilla/fs/ntfs/support.c 2002-02-25 19:38:09.000000000 +0000 +++ linux-2.4.24-ntfs-2.1.6a/fs/ntfs/support.c 1970-01-01 01:00:00.000000000 +0100 @@ -1,316 +0,0 @@ -/* - * support.c - Specific support functions - * - * Copyright (C) 1997 Martin von Löwis - * Copyright (C) 1997 Régis Duchesne - * Copyright (C) 2001 Anton Altaparmakov (AIA) - */ - -#include "ntfstypes.h" -#include "struct.h" -#include "support.h" - -#include -#include -#include -#include -#include "util.h" -#include "inode.h" -#include "macros.h" -#include - -static char print_buf[1024]; - -#ifdef DEBUG -#include "sysctl.h" -#include - -/* Debugging output */ -void ntfs_debug(int mask, const char *fmt, ...) -{ - va_list ap; - - /* Filter it with the debugging level required */ - if (ntdebug & mask) { - va_start(ap,fmt); - strcpy(print_buf, KERN_DEBUG "NTFS: "); - vsprintf(print_buf + 9, fmt, ap); - printk(print_buf); - va_end(ap); - } -} - -#ifndef ntfs_malloc -/* Verbose kmalloc */ -void *ntfs_malloc(int size) -{ - void *ret; - - ret = kmalloc(size, GFP_KERNEL); - ntfs_debug(DEBUG_MALLOC, "Allocating %x at %p\n", size, ret); - - return ret; -} -#endif - -#ifndef ntfs_free -/* Verbose kfree() */ -void ntfs_free(void *block) -{ - ntfs_debug(DEBUG_MALLOC, "Freeing memory at %p\n", block); - kfree(block); -} -#endif -#else /* End of DEBUG functions. Normal ones below... */ - -#ifndef ntfs_malloc -void *ntfs_malloc(int size) -{ - return kmalloc(size, GFP_KERNEL); -} -#endif - -#ifndef ntfs_free -void ntfs_free(void *block) -{ - kfree(block); -} -#endif -#endif /* DEBUG */ - -void ntfs_bzero(void *s, int n) -{ - memset(s, 0, n); -} - -/* These functions deliberately return no value. It is dest, anyway, - and not used anywhere in the NTFS code. */ - -void ntfs_memcpy(void *dest, const void *src, ntfs_size_t n) -{ - memcpy(dest, src, n); -} - -void ntfs_memmove(void *dest, const void *src, ntfs_size_t n) -{ - memmove(dest, src, n); -} - -/* Warn that an error occurred. */ -void ntfs_error(const char *fmt,...) -{ - va_list ap; - - va_start(ap, fmt); - strcpy(print_buf, KERN_ERR "NTFS: "); - vsprintf(print_buf + 9, fmt, ap); - printk(print_buf); - va_end(ap); -} - -int ntfs_read_mft_record(ntfs_volume *vol, int mftno, char *buf) -{ - int error; - ntfs_io io; - - ntfs_debug(DEBUG_OTHER, "read_mft_record 0x%x\n", mftno); - if (mftno == FILE_Mft) - { - ntfs_memcpy(buf, vol->mft, vol->mft_record_size); - return 0; - } - if (!vol->mft_ino) - { - printk(KERN_ERR "NTFS: mft_ino is NULL. Something is terribly " - "wrong here!\n"); - return -ENODATA; - } - io.fn_put = ntfs_put; - io.fn_get = 0; - io.param = buf; - io.size = vol->mft_record_size; - ntfs_debug(DEBUG_OTHER, "read_mft_record: calling ntfs_read_attr with: " - "mftno = 0x%x, vol->mft_record_size_bits = 0x%x, " - "mftno << vol->mft_record_size_bits = 0x%Lx\n", mftno, - vol->mft_record_size_bits, - (__s64)mftno << vol->mft_record_size_bits); - error = ntfs_read_attr(vol->mft_ino, vol->at_data, NULL, - (__s64)mftno << vol->mft_record_size_bits, &io); - if (error || (io.size != vol->mft_record_size)) { - ntfs_debug(DEBUG_OTHER, "read_mft_record: read 0x%x failed " - "(%d,%d,%d)\n", mftno, error, io.size, - vol->mft_record_size); - return error ? error : -ENODATA; - } - ntfs_debug(DEBUG_OTHER, "read_mft_record: finished read 0x%x\n", mftno); - if (!ntfs_check_mft_record(vol, buf)) { - /* FIXME: This is incomplete behaviour. We might be able to - * recover at this stage. ntfs_check_mft_record() is too - * conservative at aborting it's operations. It is OK for - * now as we just can't handle some on disk structures - * this way. (AIA) */ - printk(KERN_WARNING "NTFS: Invalid MFT record for 0x%x\n", mftno); - return -EIO; - } - ntfs_debug(DEBUG_OTHER, "read_mft_record: Done 0x%x\n", mftno); - return 0; -} - -int ntfs_getput_clusters(ntfs_volume *vol, int cluster, ntfs_size_t start_offs, - ntfs_io *buf) -{ - struct super_block *sb = NTFS_SB(vol); - struct buffer_head *bh; - int length = buf->size; - int error = 0; - ntfs_size_t to_copy; - - ntfs_debug(DEBUG_OTHER, "%s_clusters %d %d %d\n", - buf->do_read ? "get" : "put", cluster, start_offs, length); - to_copy = vol->cluster_size - start_offs; - while (length) { - if (!(bh = sb_bread(sb, cluster))) { - ntfs_debug(DEBUG_OTHER, "%s failed\n", - buf->do_read ? "Reading" : "Writing"); - error = -EIO; - goto error_ret; - } - if (to_copy > length) - to_copy = length; - lock_buffer(bh); - if (buf->do_read) { - buf->fn_put(buf, bh->b_data + start_offs, to_copy); - unlock_buffer(bh); - } else { - buf->fn_get(bh->b_data + start_offs, buf, to_copy); - mark_buffer_dirty(bh); - unlock_buffer(bh); - /* - * Note: We treat synchronous IO on a per volume basis - * disregarding flags of individual inodes. This can - * lead to some strange write ordering effects upon a - * remount with a change in the sync flag but it should - * not break anything. [Except if the system crashes - * at that point in time but there would be more thigs - * to worry about than that in that case...]. (AIA) - */ - if (sb->s_flags & MS_SYNCHRONOUS) { - ll_rw_block(WRITE, 1, &bh); - wait_on_buffer(bh); - if (buffer_req(bh) && !buffer_uptodate(bh)) { - printk(KERN_ERR "IO error syncing NTFS " - "cluster [%s:%i]\n", - bdevname(sb->s_dev), cluster); - brelse(bh); - error = -EIO; - goto error_ret; - } - } - } - brelse(bh); - length -= to_copy; - start_offs = 0; - to_copy = vol->cluster_size; - cluster++; - } -error_ret: - return error; -} - -ntfs_time64_t ntfs_now(void) -{ - return ntfs_unixutc2ntutc(CURRENT_TIME); -} - -int ntfs_dupuni2map(ntfs_volume *vol, ntfs_u16 *in, int in_len, char **out, - int *out_len) -{ - int i, o, chl, chi; - char *result, *buf, charbuf[NLS_MAX_CHARSET_SIZE]; - struct nls_table *nls = vol->nls_map; - - result = ntfs_malloc(in_len + 1); - if (!result) - return -ENOMEM; - *out_len = in_len; - for (i = o = 0; i < in_len; i++) { - /* FIXME: Byte order? */ - wchar_t uni = in[i]; - if ((chl = nls->uni2char(uni, charbuf, - NLS_MAX_CHARSET_SIZE)) > 0) { - /* Adjust result buffer. */ - if (chl > 1) { - buf = ntfs_malloc(*out_len + chl - 1); - if (!buf) { - i = -ENOMEM; - goto err_ret; - } - memcpy(buf, result, o); - ntfs_free(result); - result = buf; - *out_len += (chl - 1); - } - for (chi = 0; chi < chl; chi++) - result[o++] = charbuf[chi]; - } else { - /* Invalid character. */ - printk(KERN_ERR "NTFS: Unicode name contains a " - "character that cannot be converted " - "to chosen character set. Remount " - "with utf8 encoding and this should " - "work.\n"); - i = -EILSEQ; - goto err_ret; - } - } - result[*out_len] = '\0'; - *out = result; - return 0; -err_ret: - ntfs_free(result); - *out_len = 0; - *out = NULL; - return i; -} - -int ntfs_dupmap2uni(ntfs_volume *vol, char* in, int in_len, ntfs_u16 **out, - int *out_len) -{ - int i, o; - ntfs_u16 *result; - struct nls_table *nls = vol->nls_map; - - *out = result = ntfs_malloc(2 * in_len); - if (!result) { - *out_len = 0; - return -ENOMEM; - } - *out_len = in_len; - for (i = o = 0; i < in_len; i++, o++) { - wchar_t uni; - int charlen; - - charlen = nls->char2uni(&in[i], in_len - i, &uni); - if (charlen < 0) { - i = charlen; - goto err_ret; - } - *out_len -= charlen - 1; - i += charlen - 1; - /* FIXME: Byte order? */ - result[o] = uni; - if (!result[o]) { - i = -EILSEQ; - goto err_ret; - } - } - return 0; -err_ret: - printk(KERN_ERR "NTFS: Name contains a character that cannot be " - "converted to Unicode.\n"); - ntfs_free(result); - *out_len = 0; - *out = NULL; - return i; -} - diff -urN linux-2.4.24-vanilla/fs/ntfs/support.h linux-2.4.24-ntfs-2.1.6a/fs/ntfs/support.h --- linux-2.4.24-vanilla/fs/ntfs/support.h 2002-02-25 19:38:09.000000000 +0000 +++ linux-2.4.24-ntfs-2.1.6a/fs/ntfs/support.h 1970-01-01 01:00:00.000000000 +0100 @@ -1,89 +0,0 @@ -/* - * support.h - Header file for specific support.c - * - * Copyright (C) 1997 Régis Duchesne - * Copyright (c) 2001 Anton Altaparmakov (AIA) - */ - -/* Debug levels */ -#define DEBUG_OTHER 1 -#define DEBUG_MALLOC 2 -#define DEBUG_BSD 4 -#define DEBUG_LINUX 8 -#define DEBUG_DIR1 16 -#define DEBUG_DIR2 32 -#define DEBUG_DIR3 64 -#define DEBUG_FILE1 128 -#define DEBUG_FILE2 256 -#define DEBUG_FILE3 512 -#define DEBUG_NAME1 1024 -#define DEBUG_NAME2 2048 - -#ifdef DEBUG -void ntfs_debug(int mask, const char *fmt, ...); -#else -#define ntfs_debug(mask, fmt...) do {} while (0) -#endif - -#include -#include - -#define ntfs_malloc(size) kmalloc(size, GFP_KERNEL) - -#define ntfs_free(ptr) kfree(ptr) - -/** - * ntfs_vmalloc - allocate memory in multiples of pages - * @size number of bytes to allocate - * - * Allocates @size bytes of memory, rounded up to multiples of PAGE_SIZE and - * returns a pointer to the allocated memory. - * - * If there was insufficient memory to complete the request, return NULL. - */ -static inline void *ntfs_vmalloc(unsigned long size) -{ - if (size <= PAGE_SIZE) { - if (size) { - /* kmalloc() has per-CPU caches so if faster for now. */ - return kmalloc(PAGE_SIZE, GFP_NOFS); - /* return (void *)__get_free_page(GFP_NOFS | - __GFP_HIGHMEM); */ - } - BUG(); - } - if (size >> PAGE_SHIFT < num_physpages) - return __vmalloc(size, GFP_NOFS | __GFP_HIGHMEM, PAGE_KERNEL); - return NULL; -} - -static inline void ntfs_vfree(void *addr) -{ - if ((unsigned long)addr < VMALLOC_START) { - return kfree(addr); - /* return free_page((unsigned long)addr); */ - } - vfree(addr); -} - -void ntfs_bzero(void *s, int n); - -void ntfs_memcpy(void *dest, const void *src, ntfs_size_t n); - -void ntfs_memmove(void *dest, const void *src, ntfs_size_t n); - -void ntfs_error(const char *fmt,...); - -int ntfs_read_mft_record(ntfs_volume *vol, int mftno, char *buf); - -int ntfs_getput_clusters(ntfs_volume *pvol, int cluster, ntfs_size_t offs, - ntfs_io *buf); - -ntfs_time64_t ntfs_now(void); - -int ntfs_dupuni2map(ntfs_volume *vol, ntfs_u16 *in, int in_len, char **out, - int *out_len); - -int ntfs_dupmap2uni(ntfs_volume *vol, char* in, int in_len, ntfs_u16 **out, - int *out_len); - diff -urN linux-2.4.24-vanilla/fs/ntfs/sysctl.c linux-2.4.24-ntfs-2.1.6a/fs/ntfs/sysctl.c --- linux-2.4.24-vanilla/fs/ntfs/sysctl.c 2001-07-16 23:14:10.000000000 +0100 +++ linux-2.4.24-ntfs-2.1.6a/fs/ntfs/sysctl.c 2004-01-21 14:28:25.000000000 +0000 @@ -1,55 +1,88 @@ /* - * sysctl.c - System control stuff + * sysctl.c - Code for sysctl handling in NTFS Linux kernel driver. Part of + * the Linux-NTFS project. Adapted from the old NTFS driver, + * Copyright (C) 1997 Martin von Löwis, Régis Duchesne. * - * Copyright (C) 1997 Martin von Löwis - * Copyright (C) 1997 Régis Duchesne + * Copyright (c) 2002 Anton Altaparmakov. + * + * This program/include file is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as published + * by the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program/include file is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program (in the main directory of the Linux-NTFS + * distribution in the file COPYING); if not, write to the Free Software + * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -#include "sysctl.h" - #ifdef DEBUG -#include + +#include + +#ifdef CONFIG_SYSCTL + +#include #include -int ntdebug = 0; +#include "sysctl.h" +#include "debug.h" + +#include "kcompat.h" -/* Add or remove the debug sysctl - * Is this really the only file system with sysctls ? +#define FS_NTFS 1 + +/* Definition of the ntfs sysctl. */ +static ctl_table ntfs_sysctls[] = { + { FS_NTFS, "ntfs-debug", /* Binary and text IDs. */ + &debug_msgs,sizeof(debug_msgs), /* Data pointer and size. */ + 0644, NULL, &proc_dointvec }, /* Mode, child, proc handler. */ + { 0 } +}; + +/* Define the parent directory /proc/sys/fs. */ +static ctl_table sysctls_root[] = { + { CTL_FS, "fs", NULL, 0, 0555, ntfs_sysctls }, + { 0 } +}; + +/* Storage for the sysctls header. */ +static struct ctl_table_header *sysctls_root_table = NULL; + +/** + * ntfs_sysctl - add or remove the debug sysctl + * @add: add (1) or remove (0) the sysctl + * + * Add or remove the debug sysctl. Return 0 on success or -errno on error. */ -void ntfs_sysctl(int add) +int ntfs_sysctl(int add) { -#define FS_NTFS 1 - /* Definition of the sysctl */ - static ctl_table ntfs_sysctls[]={ - {FS_NTFS, /* ID */ - "ntfs-debug", /* name in /proc */ - &ntdebug,sizeof(ntdebug), /* data ptr, data size */ - 0644, /* mode */ - 0, /* child */ - proc_dointvec, /* proc handler */ - 0, /* strategy */ - 0, /* proc control block */ - 0,0}, /* extra */ - {0} - }; - /* Define the parent file : /proc/sys/fs */ - static ctl_table sysctls_root[]={ - {CTL_FS, - "fs", - NULL,0, - 0555, - ntfs_sysctls}, - {0} - }; - static struct ctl_table_header *sysctls_root_header = NULL; - - if(add){ - if(!sysctls_root_header) - sysctls_root_header = register_sysctl_table(sysctls_root, 0); - } else if(sysctls_root_header) { - unregister_sysctl_table(sysctls_root_header); - sysctls_root_header = NULL; + if (add) { + BUG_ON(sysctls_root_table); + sysctls_root_table = register_sysctl_table(sysctls_root, 0); + if (!sysctls_root_table) + return -ENOMEM; +#ifdef CONFIG_PROC_FS + /* + * If the proc file system is in use and we are a module, need + * to set the owner of our proc entry to our module. In the + * non-modular case, THIS_MODULE is NULL, so this is ok. + */ + ntfs_sysctls[0].de->owner = THIS_MODULE; +#endif + } else { + BUG_ON(!sysctls_root_table); + unregister_sysctl_table(sysctls_root_table); + sysctls_root_table = NULL; } + return 0; } + +#endif /* CONFIG_SYSCTL */ #endif /* DEBUG */ diff -urN linux-2.4.24-vanilla/fs/ntfs/sysctl.h linux-2.4.24-ntfs-2.1.6a/fs/ntfs/sysctl.h --- linux-2.4.24-vanilla/fs/ntfs/sysctl.h 2001-07-16 23:14:10.000000000 +0100 +++ linux-2.4.24-ntfs-2.1.6a/fs/ntfs/sysctl.h 2004-01-21 14:31:49.000000000 +0000 @@ -1,17 +1,43 @@ /* - * sysctl.h - Header file for sysctl.c + * sysctl.h - Defines for sysctl handling in NTFS Linux kernel driver. Part of + * the Linux-NTFS project. Adapted from the old NTFS driver, + * Copyright (C) 1997 Martin von Löwis, Régis Duchesne. + * + * Copyright (c) 2002 Anton Altaparmakov. * - * Copyright (C) 1997 Martin von Löwis - * Copyright (C) 1997 Régis Duchesne + * This program/include file is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as published + * by the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program/include file is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program (in the main directory of the Linux-NTFS + * distribution in the file COPYING); if not, write to the Free Software + * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -#ifdef DEBUG - extern int ntdebug; +#ifndef _LINUX_NTFS_SYSCTL_H +#define _LINUX_NTFS_SYSCTL_H + +#include - void ntfs_sysctl(int add); +#if (DEBUG && CONFIG_SYSCTL) + +extern int ntfs_sysctl(int add); - #define SYSCTL(x) ntfs_sysctl(x) #else - #define SYSCTL(x) -#endif /* DEBUG */ + +/* Just return success. */ +static inline int ntfs_sysctl(int add) +{ + return 0; +} + +#endif /* DEBUG && CONFIG_SYSCTL */ +#endif /* _LINUX_NTFS_SYSCTL_H */ diff -urN linux-2.4.24-vanilla/fs/ntfs/time.c linux-2.4.24-ntfs-2.1.6a/fs/ntfs/time.c --- linux-2.4.24-vanilla/fs/ntfs/time.c 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.4.24-ntfs-2.1.6a/fs/ntfs/time.c 2004-01-21 14:28:25.000000000 +0000 @@ -0,0 +1,81 @@ +/* + * time.c - NTFS time conversion functions. Part of the Linux-NTFS project. + * + * Copyright (c) 2001 Anton Altaparmakov. + * + * This program/include file is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as published + * by the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program/include file is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program (in the main directory of the Linux-NTFS + * distribution in the file COPYING); if not, write to the Free Software + * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include /* For CURRENT_TIME. */ +#include /* For do_div(). */ + +#include "ntfs.h" + +#define NTFS_TIME_OFFSET ((s64)(369 * 365 + 89) * 24 * 3600 * 10000000) + +/** + * utc2ntfs - convert Linux time to NTFS time + * @time: Linux time to convert to NTFS + * + * Convert the Linux time @time to its corresponding NTFS time and return that + * in little endian format. + * + * Linux stores time in a long at present and measures it as the number of + * 1-second intervals since 1st January 1970, 00:00:00 UTC. + * + * NTFS uses Microsoft's standard time format which is stored in a s64 and is + * measured as the number of 100 nano-second intervals since 1st January 1601, + * 00:00:00 UTC. + */ +inline s64 utc2ntfs(const time_t time) +{ + /* Convert to 100ns intervals and then add the NTFS time offset. */ + return cpu_to_sle64((s64)time * 10000000 + NTFS_TIME_OFFSET); +} + +/** + * get_current_ntfs_time - get the current time in little endian NTFS format + * + * Get the current time from the Linux kernel, convert it to its corresponding + * NTFS time and return that in little endian format. + */ +inline s64 get_current_ntfs_time(void) +{ + return utc2ntfs(CURRENT_TIME); +} + +/** + * ntfs2utc - convert NTFS time to Linux time + * @time: NTFS time (little endian) to convert to Linux + * + * Convert the little endian NTFS time @time to its corresponding Linux time + * and return that in cpu format. + * + * Linux stores time in a long at present and measures it as the number of + * 1-second intervals since 1st January 1970, 00:00:00 UTC. + * + * NTFS uses Microsoft's standard time format which is stored in a s64 and is + * measured as the number of 100 nano-second intervals since 1st January 1601, + * 00:00:00 UTC. + */ +inline time_t ntfs2utc(const s64 time) +{ + /* Subtract the NTFS time offset, then convert to 1s intervals. */ + s64 t = sle64_to_cpu(time) - NTFS_TIME_OFFSET; + do_div(t, 10000000); + return (time_t)t; +} + diff -urN linux-2.4.24-vanilla/fs/ntfs/types.h linux-2.4.24-ntfs-2.1.6a/fs/ntfs/types.h --- linux-2.4.24-vanilla/fs/ntfs/types.h 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.4.24-ntfs-2.1.6a/fs/ntfs/types.h 2004-01-21 14:28:25.000000000 +0000 @@ -0,0 +1,84 @@ +/* + * types.h - Defines for NTFS Linux kernel driver specific types. + * Part of the Linux-NTFS project. + * + * Copyright (c) 2001,2002 Anton Altaparmakov. + * + * This program/include file is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as published + * by the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program/include file is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program (in the main directory of the Linux-NTFS + * distribution in the file COPYING); if not, write to the Free Software + * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _LINUX_NTFS_TYPES_H +#define _LINUX_NTFS_TYPES_H + +#if __GNUC__ < 2 || (__GNUC__ == 2 && __GNUC_MINOR__ < 96) +#define SN(X) X /* Struct Name */ +#define SC(P,N) P.N /* ShortCut: Prefix, Name */ +#else +#define SN(X) +#define SC(P,N) N +#endif + +/* 2-byte Unicode character type. */ +typedef u16 uchar_t; +#define UCHAR_T_SIZE_BITS 1 + +/* + * Clusters are signed 64-bit values on NTFS volumes. We define two types, LCN + * and VCN, to allow for type checking and better code readability. + */ +typedef s64 VCN; +typedef s64 LCN; + +/** + * run_list_element - in memory vcn to lcn mapping array element + * @vcn: starting vcn of the current array element + * @lcn: starting lcn of the current array element + * @length: length in clusters of the current array element + * + * The last vcn (in fact the last vcn + 1) is reached when length == 0. + * + * When lcn == -1 this means that the count vcns starting at vcn are not + * physically allocated (i.e. this is a hole / data is sparse). + */ +typedef struct { /* In memory vcn to lcn mapping structure element. */ + VCN vcn; /* vcn = Starting virtual cluster number. */ + LCN lcn; /* lcn = Starting logical cluster number. */ + s64 length; /* Run length in clusters. */ +} run_list_element; + +/** + * run_list - in memory vcn to lcn mapping array including a read/write lock + * @rl: pointer to an array of run list elements + * @lock: read/write spinlock for serializing access to @rl + * + */ +typedef struct { + run_list_element *rl; + struct rw_semaphore lock; +} run_list; + +typedef enum { + FALSE = 0, + TRUE = 1 +} BOOL; + +typedef enum { + CASE_SENSITIVE = 0, + IGNORE_CASE = 1, +} IGNORE_CASE_BOOL; + +#endif /* _LINUX_NTFS_TYPES_H */ + diff -urN linux-2.4.24-vanilla/fs/ntfs/unistr.c linux-2.4.24-ntfs-2.1.6a/fs/ntfs/unistr.c --- linux-2.4.24-vanilla/fs/ntfs/unistr.c 2001-08-28 14:57:18.000000000 +0100 +++ linux-2.4.24-ntfs-2.1.6a/fs/ntfs/unistr.c 2004-01-21 14:28:25.000000000 +0000 @@ -1,7 +1,7 @@ /* - * unistr.c - Unicode string handling. Part of the Linux-NTFS project. + * unistr.c - NTFS Unicode string handling. Part of the Linux-NTFS project. * - * Copyright (c) 2000,2001 Anton Altaparmakov. + * Copyright (c) 2001-2003 Anton Altaparmakov * * This program/include file is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as published @@ -19,17 +19,21 @@ * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -#include -#include +#include "ntfs.h" -#include "unistr.h" -#include "macros.h" +/* + * IMPORTANT + * ========= + * + * All these routines assume that the Unicode characters are in little endian + * encoding inside the strings!!! + */ /* * This is used by the name collation functions to quickly determine what * characters are (in)valid. */ -const __u8 legal_ansi_char_array[0x40] = { +static const u8 legal_ansi_char_array[0x40] = { 0x00, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, @@ -57,45 +61,46 @@ * identical, or FALSE (0) if they are not identical. If @ic is IGNORE_CASE, * the @upcase table is used to performa a case insensitive comparison. */ -int ntfs_are_names_equal(wchar_t *s1, size_t s1_len, - wchar_t *s2, size_t s2_len, int ic, - wchar_t *upcase, __u32 upcase_size) +BOOL ntfs_are_names_equal(const uchar_t *s1, size_t s1_len, + const uchar_t *s2, size_t s2_len, + const IGNORE_CASE_BOOL ic, + const uchar_t *upcase, const u32 upcase_size) { if (s1_len != s2_len) - return 0; - if (!ic) - return memcmp(s1, s2, s1_len << 1) ? 0: 1; - return ntfs_wcsncasecmp(s1, s2, s1_len, upcase, upcase_size) ? 0: 1; + return FALSE; + if (ic == CASE_SENSITIVE) + return !ntfs_ucsncmp(s1, s2, s1_len); + return !ntfs_ucsncasecmp(s1, s2, s1_len, upcase, upcase_size); } /** * ntfs_collate_names - collate two Unicode names - * @upcase: upcase table (ignored if @ic is CASE_SENSITIVE) - * @upcase_len: upcase table size (ignored if @ic is CASE_SENSITIVE) * @name1: first Unicode name to compare * @name2: second Unicode name to compare - * @ic: either CASE_SENSITIVE or IGNORE_CASE * @err_val: if @name1 contains an invalid character return this value + * @ic: either CASE_SENSITIVE or IGNORE_CASE + * @upcase: upcase table (ignored if @ic is CASE_SENSITIVE) + * @upcase_len: upcase table size (ignored if @ic is CASE_SENSITIVE) * * ntfs_collate_names collates two Unicode names and returns: * * -1 if the first name collates before the second one, * 0 if the names match, * 1 if the second name collates before the first one, or - * @ec if an invalid character is encountered in @name1 during the comparison. + * @err_val if an invalid character is found in @name1 during the comparison. * * The following characters are considered invalid: '"', '*', '<', '>' and '?'. */ -int ntfs_collate_names(wchar_t *upcase, __u32 upcase_len, - wchar_t *name1, __u32 name1_len, - wchar_t *name2, __u32 name2_len, - int ic, int err_val) +int ntfs_collate_names(const uchar_t *name1, const u32 name1_len, + const uchar_t *name2, const u32 name2_len, + const int err_val, const IGNORE_CASE_BOOL ic, + const uchar_t *upcase, const u32 upcase_len) { - __u32 cnt, min_len; - wchar_t c1, c2; + u32 cnt, min_len; + uchar_t c1, c2; min_len = name1_len; - if (min_len > name2_len) + if (name1_len > name2_len) min_len = name2_len; for (cnt = 0; cnt < min_len; ++cnt) { c1 = le16_to_cpu(*name1++); @@ -112,8 +117,6 @@ return -1; if (c1 > c2) return 1; - ++name1; - ++name2; } if (name1_len < name2_len) return -1; @@ -127,7 +130,39 @@ } /** - * ntfs_wcsncasecmp - compare two little endian Unicode strings, ignoring case + * ntfs_ucsncmp - compare two little endian Unicode strings + * @s1: first string + * @s2: second string + * @n: maximum unicode characters to compare + * + * Compare the first @n characters of the Unicode strings @s1 and @s2, + * The strings in little endian format and appropriate le16_to_cpu() + * conversion is performed on non-little endian machines. + * + * The function returns an integer less than, equal to, or greater than zero + * if @s1 (or the first @n Unicode characters thereof) is found, respectively, + * to be less than, to match, or be greater than @s2. + */ +int ntfs_ucsncmp(const uchar_t *s1, const uchar_t *s2, size_t n) +{ + uchar_t c1, c2; + size_t i; + + for (i = 0; i < n; ++i) { + c1 = le16_to_cpu(s1[i]); + c2 = le16_to_cpu(s2[i]); + if (c1 < c2) + return -1; + if (c1 > c2) + return 1; + if (!c1) + break; + } + return 0; +} + +/** + * ntfs_ucsncasecmp - compare two little endian Unicode strings, ignoring case * @s1: first string * @s2: second string * @n: maximum unicode characters to compare @@ -144,10 +179,10 @@ * if @s1 (or the first @n Unicode characters thereof) is found, respectively, * to be less than, to match, or be greater than @s2. */ -int ntfs_wcsncasecmp(wchar_t *s1, wchar_t *s2, size_t n, - wchar_t *upcase, __u32 upcase_size) +int ntfs_ucsncasecmp(const uchar_t *s1, const uchar_t *s2, size_t n, + const uchar_t *upcase, const u32 upcase_size) { - wchar_t c1, c2; + uchar_t c1, c2; size_t i; for (i = 0; i < n; ++i) { @@ -165,3 +200,184 @@ return 0; } +void ntfs_upcase_name(uchar_t *name, u32 name_len, const uchar_t *upcase, + const u32 upcase_len) +{ + u32 i; + uchar_t u; + + for (i = 0; i < name_len; i++) + if ((u = le16_to_cpu(name[i])) < upcase_len) + name[i] = upcase[u]; +} + +void ntfs_file_upcase_value(FILE_NAME_ATTR *file_name_attr, + const uchar_t *upcase, const u32 upcase_len) +{ + ntfs_upcase_name((uchar_t*)&file_name_attr->file_name, + file_name_attr->file_name_length, upcase, upcase_len); +} + +int ntfs_file_compare_values(FILE_NAME_ATTR *file_name_attr1, + FILE_NAME_ATTR *file_name_attr2, + const int err_val, const IGNORE_CASE_BOOL ic, + const uchar_t *upcase, const u32 upcase_len) +{ + return ntfs_collate_names((uchar_t*)&file_name_attr1->file_name, + file_name_attr1->file_name_length, + (uchar_t*)&file_name_attr2->file_name, + file_name_attr2->file_name_length, + err_val, ic, upcase, upcase_len); +} + +/** + * ntfs_nlstoucs - convert NLS string to little endian Unicode string + * @vol: ntfs volume which we are working with + * @ins: input NLS string buffer + * @ins_len: length of input string in bytes + * @outs: on return contains the allocated output Unicode string buffer + * + * Convert the input string @ins, which is in whatever format the loaded NLS + * map dictates, into a little endian, 2-byte Unicode string. + * + * This function allocates the string and the caller is responsible for + * calling kmem_cache_free(ntfs_name_cache, @outs); when finished with it. + * + * On success the function returns the number of Unicode characters written to + * the output string *@outs (>= 0), not counting the terminating Unicode NULL + * character. *@outs is set to the allocated output string buffer. + * + * On error, a negative number corresponding to the error code is returned. In + * that case the output string is not allocated. Both *@outs and *@outs_len + * are then undefined. + * + * This might look a bit odd due to fast path optimization... + */ +int ntfs_nlstoucs(const ntfs_volume *vol, const char *ins, + const int ins_len, uchar_t **outs) +{ + struct nls_table *nls = vol->nls_map; + uchar_t *ucs; + wchar_t wc; + int i, o, wc_len; + + /* We don't trust outside sources. */ + if (ins) { + ucs = (uchar_t*)kmem_cache_alloc(ntfs_name_cache, SLAB_NOFS); + if (ucs) { + for (i = o = 0; i < ins_len; i += wc_len) { + wc_len = nls->char2uni(ins + i, ins_len - i, + &wc); + if (wc_len >= 0) { + if (wc) { + ucs[o++] = cpu_to_le16(wc); + continue; + } /* else (!wc) */ + break; + } /* else (wc_len < 0) */ + goto conversion_err; + } + ucs[o] = cpu_to_le16('\0'); + *outs = ucs; + return o; + } /* else (!ucs) */ + ntfs_error(vol->sb, "Failed to allocate name from " + "ntfs_name_cache!"); + return -ENOMEM; + } /* else (!ins) */ + ntfs_error(NULL, "Received NULL pointer."); + return -EINVAL; +conversion_err: + ntfs_error(vol->sb, "Name using character set %s contains characters " + "that cannot be converted to Unicode.", nls->charset); + kmem_cache_free(ntfs_name_cache, ucs); + return -EILSEQ; +} + +/** + * ntfs_ucstonls - convert little endian Unicode string to NLS string + * @vol: ntfs volume which we are working with + * @ins: input Unicode string buffer + * @ins_len: length of input string in Unicode characters + * @outs: on return contains the (allocated) output NLS string buffer + * @outs_len: length of output string buffer in bytes + * + * Convert the input little endian, 2-byte Unicode string @ins, of length + * @ins_len into the string format dictated by the loaded NLS. + * + * If @outs is NULL, this function allocates the string and the caller is + * responsible for calling kfree(@outs); when finished with it. + * + * On success the function returns the number of bytes written to the output + * string *@outs (>= 0), not counting the terminating NULL byte. If the output + * string buffer was allocated, *@outs is set to it. + * + * On error, a negative number corresponding to the error code is returned. In + * that case the output string is not allocated. The contents of *@outs are + * then undefined. + * + * This might look a bit odd due to fast path optimization... + */ +int ntfs_ucstonls(const ntfs_volume *vol, const uchar_t *ins, + const int ins_len, unsigned char **outs, int outs_len) +{ + struct nls_table *nls = vol->nls_map; + unsigned char *ns; + int i, o, ns_len, wc; + + /* We don't trust outside sources. */ + if (ins) { + ns = *outs; + ns_len = outs_len; + if (ns && !ns_len) { + wc = -ENAMETOOLONG; + goto conversion_err; + } + if (!ns) { + ns_len = ins_len * NLS_MAX_CHARSET_SIZE; + ns = (unsigned char*)kmalloc(ns_len + 1, GFP_NOFS); + if (!ns) + goto mem_err_out; + } + for (i = o = 0; i < ins_len; i++) { +retry: wc = nls->uni2char(le16_to_cpu(ins[i]), ns + o, + ns_len - o); + if (wc > 0) { + o += wc; + continue; + } else if (!wc) + break; + else if (wc == -ENAMETOOLONG && ns != *outs) { + unsigned char *tc; + /* Grow in multiples of 64 bytes. */ + tc = (unsigned char*)kmalloc((ns_len + 64) & + ~63, GFP_NOFS); + if (tc) { + memcpy(tc, ns, ns_len); + ns_len = ((ns_len + 64) & ~63) - 1; + kfree(ns); + ns = tc; + goto retry; + } /* No memory so goto conversion_error; */ + } /* wc < 0, real error. */ + goto conversion_err; + } + ns[o] = '\0'; + *outs = ns; + return o; + } /* else (!ins) */ + ntfs_error(vol->sb, "Received NULL pointer."); + return -EINVAL; +conversion_err: + ntfs_error(vol->sb, "Unicode name contains characters that cannot be " + "converted to character set %s.", nls->charset); + if (ns != *outs) + kfree(ns); + if (wc != -ENAMETOOLONG) + wc = -EILSEQ; + return wc; +mem_err_out: + ntfs_error(vol->sb, "Failed to allocate name!"); + return -ENOMEM; +} + diff -urN linux-2.4.24-vanilla/fs/ntfs/unistr.h linux-2.4.24-ntfs-2.1.6a/fs/ntfs/unistr.h --- linux-2.4.24-vanilla/fs/ntfs/unistr.h 2001-08-28 14:57:18.000000000 +0100 +++ linux-2.4.24-ntfs-2.1.6a/fs/ntfs/unistr.h 1970-01-01 01:00:00.000000000 +0100 @@ -1,44 +0,0 @@ -/* - * unistr.h - Exports for unicode string handling. Part of the Linux-NTFS - * project. - * - * Copyright (c) 2000,2001 Anton Altaparmakov. - * - * This program/include file is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as published - * by the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program/include file is distributed in the hope that it will be - * useful, but WITHOUT ANY WARRANTY; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program (in the main directory of the Linux-NTFS - * distribution in the file COPYING); if not, write to the Free Software - * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#ifndef _LINUX_NTFS_UNISTR_H -#define _LINUX_NTFS_UNISTR_H - -#include -#include - -extern const __u8 legal_ansi_char_array[0x40]; - -int ntfs_are_names_equal(wchar_t *s1, size_t s1_len, - wchar_t *s2, size_t s2_len, int ic, - wchar_t *upcase, __u32 upcase_size); - -int ntfs_collate_names(wchar_t *upcase, __u32 upcase_len, - wchar_t *name1, __u32 name1_len, - wchar_t *name2, __u32 name2_len, - int ic, int err_val); - -int ntfs_wcsncasecmp(wchar_t *s1, wchar_t *s2, size_t n, - wchar_t *upcase, __u32 upcase_size); - -#endif /* defined _LINUX_NTFS_UNISTR_H */ - diff -urN linux-2.4.24-vanilla/fs/ntfs/upcase.c linux-2.4.24-ntfs-2.1.6a/fs/ntfs/upcase.c --- linux-2.4.24-vanilla/fs/ntfs/upcase.c 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.4.24-ntfs-2.1.6a/fs/ntfs/upcase.c 2004-01-21 14:28:25.000000000 +0000 @@ -0,0 +1,90 @@ +/* + * upcase.c - Generate the full NTFS Unicode upcase table in little endian. + * Part of the Linux-NTFS project. + * + * Copyright (c) 2001 Richard Russon + * Copyright (c) 2001-2003 Anton Altaparmakov + * + * Modified for mkntfs inclusion 9 June 2001 by Anton Altaparmakov. + * Modified for kernel inclusion 10 September 2001 by Anton Altparmakov. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program (in the main directory of the Linux-NTFS source + * in the file COPYING); if not, write to the Free Software Foundation, + * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "ntfs.h" + +uchar_t *generate_default_upcase(void) +{ + static const int uc_run_table[][3] = { /* Start, End, Add */ + {0x0061, 0x007B, -32}, {0x0451, 0x045D, -80}, {0x1F70, 0x1F72, 74}, + {0x00E0, 0x00F7, -32}, {0x045E, 0x0460, -80}, {0x1F72, 0x1F76, 86}, + {0x00F8, 0x00FF, -32}, {0x0561, 0x0587, -48}, {0x1F76, 0x1F78, 100}, + {0x0256, 0x0258, -205}, {0x1F00, 0x1F08, 8}, {0x1F78, 0x1F7A, 128}, + {0x028A, 0x028C, -217}, {0x1F10, 0x1F16, 8}, {0x1F7A, 0x1F7C, 112}, + {0x03AC, 0x03AD, -38}, {0x1F20, 0x1F28, 8}, {0x1F7C, 0x1F7E, 126}, + {0x03AD, 0x03B0, -37}, {0x1F30, 0x1F38, 8}, {0x1FB0, 0x1FB2, 8}, + {0x03B1, 0x03C2, -32}, {0x1F40, 0x1F46, 8}, {0x1FD0, 0x1FD2, 8}, + {0x03C2, 0x03C3, -31}, {0x1F51, 0x1F52, 8}, {0x1FE0, 0x1FE2, 8}, + {0x03C3, 0x03CC, -32}, {0x1F53, 0x1F54, 8}, {0x1FE5, 0x1FE6, 7}, + {0x03CC, 0x03CD, -64}, {0x1F55, 0x1F56, 8}, {0x2170, 0x2180, -16}, + {0x03CD, 0x03CF, -63}, {0x1F57, 0x1F58, 8}, {0x24D0, 0x24EA, -26}, + {0x0430, 0x0450, -32}, {0x1F60, 0x1F68, 8}, {0xFF41, 0xFF5B, -32}, + {0} + }; + + static const int uc_dup_table[][2] = { /* Start, End */ + {0x0100, 0x012F}, {0x01A0, 0x01A6}, {0x03E2, 0x03EF}, {0x04CB, 0x04CC}, + {0x0132, 0x0137}, {0x01B3, 0x01B7}, {0x0460, 0x0481}, {0x04D0, 0x04EB}, + {0x0139, 0x0149}, {0x01CD, 0x01DD}, {0x0490, 0x04BF}, {0x04EE, 0x04F5}, + {0x014A, 0x0178}, {0x01DE, 0x01EF}, {0x04BF, 0x04BF}, {0x04F8, 0x04F9}, + {0x0179, 0x017E}, {0x01F4, 0x01F5}, {0x04C1, 0x04C4}, {0x1E00, 0x1E95}, + {0x018B, 0x018B}, {0x01FA, 0x0218}, {0x04C7, 0x04C8}, {0x1EA0, 0x1EF9}, + {0} + }; + + static const int uc_word_table[][2] = { /* Offset, Value */ + {0x00FF, 0x0178}, {0x01AD, 0x01AC}, {0x01F3, 0x01F1}, {0x0269, 0x0196}, + {0x0183, 0x0182}, {0x01B0, 0x01AF}, {0x0253, 0x0181}, {0x026F, 0x019C}, + {0x0185, 0x0184}, {0x01B9, 0x01B8}, {0x0254, 0x0186}, {0x0272, 0x019D}, + {0x0188, 0x0187}, {0x01BD, 0x01BC}, {0x0259, 0x018F}, {0x0275, 0x019F}, + {0x018C, 0x018B}, {0x01C6, 0x01C4}, {0x025B, 0x0190}, {0x0283, 0x01A9}, + {0x0192, 0x0191}, {0x01C9, 0x01C7}, {0x0260, 0x0193}, {0x0288, 0x01AE}, + {0x0199, 0x0198}, {0x01CC, 0x01CA}, {0x0263, 0x0194}, {0x0292, 0x01B7}, + {0x01A8, 0x01A7}, {0x01DD, 0x018E}, {0x0268, 0x0197}, + {0} + }; + + int i, r; + uchar_t *uc; + + uc = ntfs_malloc_nofs(default_upcase_len * sizeof(uchar_t)); + if (!uc) + return uc; + memset(uc, 0, default_upcase_len * sizeof(uchar_t)); + for (i = 0; i < default_upcase_len; i++) + uc[i] = cpu_to_le16(i); + for (r = 0; uc_run_table[r][0]; r++) + for (i = uc_run_table[r][0]; i < uc_run_table[r][1]; i++) + uc[i] = cpu_to_le16((le16_to_cpu(uc[i]) + + uc_run_table[r][2])); + for (r = 0; uc_dup_table[r][0]; r++) + for (i = uc_dup_table[r][0]; i < uc_dup_table[r][1]; i += 2) + uc[i + 1] = cpu_to_le16(le16_to_cpu(uc[i + 1]) - 1); + for (r = 0; uc_word_table[r][0]; r++) + uc[uc_word_table[r][0]] = cpu_to_le16(uc_word_table[r][1]); + return uc; +} + diff -urN linux-2.4.24-vanilla/fs/ntfs/util.c linux-2.4.24-ntfs-2.1.6a/fs/ntfs/util.c --- linux-2.4.24-vanilla/fs/ntfs/util.c 2001-08-14 00:40:19.000000000 +0100 +++ linux-2.4.24-ntfs-2.1.6a/fs/ntfs/util.c 1970-01-01 01:00:00.000000000 +0100 @@ -1,265 +0,0 @@ -/* - * util.c - Miscellaneous support - * - * Copyright (C) 1997,1999 Martin von Löwis - * Copyright (C) 1997 Régis Duchesne - * Copyright (C) 2001 Anton Altaparmakov (AIA) - * - * The utf8 routines are copied from Python wstrop module. - */ - -#include "ntfstypes.h" -#include "struct.h" -#include "util.h" -#include -#include -#include /* For do_div(). */ -#include "support.h" - -/* - * Converts a single wide character to a sequence of utf8 bytes. - * The character is represented in host byte order. - * Returns the number of bytes, or 0 on error. - */ -static int to_utf8(ntfs_u16 c, unsigned char *buf) -{ - if (c == 0) - return 0; /* No support for embedded 0 runes. */ - if (c < 0x80) { - if (buf) - buf[0] = (unsigned char)c; - return 1; - } - if (c < 0x800) { - if (buf) { - buf[0] = 0xc0 | (c >> 6); - buf[1] = 0x80 | (c & 0x3f); - } - return 2; - } - /* c < 0x10000 */ - if (buf) { - buf[0] = 0xe0 | (c >> 12); - buf[1] = 0x80 | ((c >> 6) & 0x3f); - buf[2] = 0x80 | (c & 0x3f); - } - return 3; -} - -/* - * Decodes a sequence of utf8 bytes into a single wide character. - * The character is returned in host byte order. - * Returns the number of bytes consumed, or 0 on error. - */ -static int from_utf8(const unsigned char *str, ntfs_u16 *c) -{ - int l = 0, i; - - if (*str < 0x80) { - *c = *str; - return 1; - } - if (*str < 0xc0) /* Lead byte must not be 10xxxxxx. */ - return 0; /* Is c0 a possible lead byte? */ - if (*str < 0xe0) { /* 110xxxxx */ - *c = *str & 0x1f; - l = 2; - } else if (*str < 0xf0) { /* 1110xxxx */ - *c = *str & 0xf; - l = 3; - } else if (*str < 0xf8) { /* 11110xxx */ - *c = *str & 7; - l = 4; - } else /* We don't support characters above 0xFFFF in NTFS. */ - return 0; - for (i = 1; i < l; i++) { - /* All other bytes must be 10xxxxxx. */ - if ((str[i] & 0xc0) != 0x80) - return 0; - *c <<= 6; - *c |= str[i] & 0x3f; - } - return l; -} - -/* - * Converts wide string to UTF-8. Expects two in- and two out-parameters. - * Returns 0 on success, or error code. - * The caller has to free the result string. - */ -static int ntfs_dupuni2utf8(ntfs_u16 *in, int in_len, char **out, int *out_len) -{ - int i, tmp; - int len8; - unsigned char *result; - - ntfs_debug(DEBUG_NAME1, "converting l = %d\n", in_len); - /* Count the length of the resulting UTF-8. */ - for (i = len8 = 0; i < in_len; i++) { - tmp = to_utf8(NTFS_GETU16(in + i), 0); - if (!tmp) - /* Invalid character. */ - return -EILSEQ; - len8 += tmp; - } - *out = result = ntfs_malloc(len8 + 1); /* allow for zero-termination */ - if (!result) - return -ENOMEM; - result[len8] = '\0'; - *out_len = len8; - for (i = len8 = 0; i < in_len; i++) - len8 += to_utf8(NTFS_GETU16(in + i), result + len8); - ntfs_debug(DEBUG_NAME1, "result %p:%s\n", result, result); - return 0; -} - -/* - * Converts an UTF-8 sequence to a wide string. Same conventions as the - * previous function. - */ -static int ntfs_duputf82uni(unsigned char* in, int in_len, ntfs_u16** out, - int *out_len) -{ - int i, tmp; - int len16; - ntfs_u16* result; - ntfs_u16 wtmp; - - for (i = len16 = 0; i < in_len; i += tmp, len16++) { - tmp = from_utf8(in + i, &wtmp); - if (!tmp) - return -EILSEQ; - } - *out = result = ntfs_malloc(2 * (len16 + 1)); - if (!result) - return -ENOMEM; - result[len16] = 0; - *out_len = len16; - for (i = len16 = 0; i < in_len; i += tmp, len16++) { - tmp = from_utf8(in + i, &wtmp); - NTFS_PUTU16(result + len16, wtmp); - } - return 0; -} - -/* Encodings dispatchers. */ -int ntfs_encodeuni(ntfs_volume *vol, ntfs_u16 *in, int in_len, char **out, - int *out_len) -{ - if (vol->nls_map) - return ntfs_dupuni2map(vol, in, in_len, out, out_len); - else - return ntfs_dupuni2utf8(in, in_len, out, out_len); -} - -int ntfs_decodeuni(ntfs_volume *vol, char *in, int in_len, ntfs_u16 **out, - int *out_len) -{ - if (vol->nls_map) - return ntfs_dupmap2uni(vol, in, in_len, out, out_len); - else - return ntfs_duputf82uni(in, in_len, out, out_len); -} - -/* Same address space copies. */ -void ntfs_put(ntfs_io *dest, void *src, ntfs_size_t n) -{ - ntfs_memcpy(dest->param, src, n); - ((char*)dest->param) += n; -} - -void ntfs_get(void* dest, ntfs_io *src, ntfs_size_t n) -{ - ntfs_memcpy(dest, src->param, n); - ((char*)src->param) += n; -} - -void *ntfs_calloc(int size) -{ - void *result = ntfs_malloc(size); - if (result) - ntfs_bzero(result, size); - return result; -} - -/* Copy len ascii characters from from to to. :) */ -void ntfs_ascii2uni(short int *to, char *from, int len) -{ - int i; - - for (i = 0; i < len; i++) - NTFS_PUTU16(to + i, from[i]); - to[i] = 0; -} - -/* strncmp for Unicode strings. */ -int ntfs_uni_strncmp(short int* a, short int *b, int n) -{ - int i; - - for(i = 0; i < n; i++) - { - if (NTFS_GETU16(a + i) < NTFS_GETU16(b + i)) - return -1; - if (NTFS_GETU16(b + i) < NTFS_GETU16(a + i)) - return 1; - if (NTFS_GETU16(a + i) == 0) - break; - } - return 0; -} - -/* strncmp between Unicode and ASCII strings. */ -int ntfs_ua_strncmp(short int* a, char* b, int n) -{ - int i; - - for (i = 0; i < n; i++) { - if(NTFS_GETU16(a + i) < b[i]) - return -1; - if(b[i] < NTFS_GETU16(a + i)) - return 1; - if (b[i] == 0) - return 0; - } - return 0; -} - -#define NTFS_TIME_OFFSET ((ntfs_time64_t)(369*365 + 89) * 24 * 3600 * 10000000) - -/* Convert the NT UTC (based 1.1.1601, in hundred nanosecond units) - * into Unix UTC (based 1.1.1970, in seconds). */ -ntfs_time_t ntfs_ntutc2unixutc(ntfs_time64_t ntutc) -{ - /* Subtract the NTFS time offset, then convert to 1s intervals. */ - ntfs_time64_t t = ntutc - NTFS_TIME_OFFSET; - do_div(t, 10000000); - return (ntfs_time_t)t; -} - -/* Convert the Unix UTC into NT UTC. */ -ntfs_time64_t ntfs_unixutc2ntutc(ntfs_time_t t) -{ - /* Convert to 100ns intervals and then add the NTFS time offset. */ - return (ntfs_time64_t)t * 10000000 + NTFS_TIME_OFFSET; -} - -#undef NTFS_TIME_OFFSET - -/* Fill index name. */ -void ntfs_indexname(char *buf, int type) -{ - char hex[] = "0123456789ABCDEF"; - int index; - *buf++ = '$'; - *buf++ = 'I'; - for (index = 24; index > 0; index -= 4) - if ((0xF << index) & type) - break; - while (index >= 0) { - *buf++ = hex[(type >> index) & 0xF]; - index -= 4; - } - *buf = '\0'; -} - diff -urN linux-2.4.24-vanilla/fs/ntfs/util.h linux-2.4.24-ntfs-2.1.6a/fs/ntfs/util.h --- linux-2.4.24-vanilla/fs/ntfs/util.h 2001-09-08 20:24:40.000000000 +0100 +++ linux-2.4.24-ntfs-2.1.6a/fs/ntfs/util.h 1970-01-01 01:00:00.000000000 +0100 @@ -1,56 +0,0 @@ -/* - * util.h - Header file for util.c - * - * Copyright (C) 1997 Régis Duchesne - * Copyright (C) 2001 Anton Altaparmakov (AIA) - */ - -/* The first 16 inodes correspond to NTFS special files. */ -typedef enum { - FILE_Mft = 0, - FILE_MftMirr = 1, - FILE_LogFile = 2, - FILE_Volume = 3, - FILE_AttrDef = 4, - FILE_root = 5, - FILE_BitMap = 6, - FILE_Boot = 7, - FILE_BadClus = 8, - FILE_Secure = 9, - FILE_UpCase = 10, - FILE_Extend = 11, - FILE_Reserved12 = 12, - FILE_Reserved13 = 13, - FILE_Reserved14 = 14, - FILE_Reserved15 = 15, -} NTFS_SYSTEM_FILES; - -/* Memory management */ -void *ntfs_calloc(int size); - -/* String operations */ -/* Copy Unicode <-> ASCII */ -void ntfs_ascii2uni(short int *to, char *from, int len); - -/* Comparison */ -int ntfs_uni_strncmp(short int* a, short int *b, int n); -int ntfs_ua_strncmp(short int* a, char* b, int n); - -/* Same address space copies */ -void ntfs_put(ntfs_io *dest, void *src, ntfs_size_t n); -void ntfs_get(void* dest, ntfs_io *src, ntfs_size_t n); - -/* Charset conversion */ -int ntfs_encodeuni(ntfs_volume *vol, ntfs_u16 *in, int in_len, char **out, - int *out_len); -int ntfs_decodeuni(ntfs_volume *vol, char *in, int in_len, ntfs_u16 **out, - int *out_len); - -/* Time conversion */ -/* NT <-> Unix */ -ntfs_time_t ntfs_ntutc2unixutc(ntfs_time64_t ntutc); -ntfs_time64_t ntfs_unixutc2ntutc(ntfs_time_t t); - -/* Attribute names */ -void ntfs_indexname(char *buf, int type); - diff -urN linux-2.4.24-vanilla/fs/ntfs/volume.h linux-2.4.24-ntfs-2.1.6a/fs/ntfs/volume.h --- linux-2.4.24-vanilla/fs/ntfs/volume.h 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.4.24-ntfs-2.1.6a/fs/ntfs/volume.h 2004-01-21 14:28:25.000000000 +0000 @@ -0,0 +1,137 @@ +/* + * volume.h - Defines for volume structures in NTFS Linux kernel driver. Part + * of the Linux-NTFS project. + * + * Copyright (c) 2001,2002 Anton Altaparmakov. + * Copyright (c) 2002 Richard Russon. + * + * This program/include file is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as published + * by the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program/include file is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program (in the main directory of the Linux-NTFS + * distribution in the file COPYING); if not, write to the Free Software + * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _LINUX_NTFS_VOLUME_H +#define _LINUX_NTFS_VOLUME_H + +#include "types.h" + +/* + * The NTFS in memory super block structure. + */ +typedef struct { + /* + * FIXME: Reorder to have commonly used together element within the + * same cache line, aiming at a cache line size of 32 bytes. Aim for + * 64 bytes for less commonly used together elements. Put most commonly + * used elements to front of structure. Obviously do this only when the + * structure has stabilized... (AIA) + */ + /* Device specifics. */ + struct super_block *sb; /* Pointer back to the super_block, + so we don't have to get the offset + every time. */ + struct semaphore inode_lock; /* Needed to close iget4() race. */ + LCN nr_blocks; /* Number of NTFS_BLOCK_SIZE bytes + sized blocks on the device. */ + /* Configuration provided by user at mount time. */ + unsigned long flags; /* Miscellaneous flags, see above. */ + uid_t uid; /* uid that files will be mounted as. */ + gid_t gid; /* gid that files will be mounted as. */ + mode_t fmask; /* The mask for file permissions. */ + mode_t dmask; /* The mask for directory + permissions. */ + u8 mft_zone_multiplier; /* Initial mft zone multiplier. */ + u8 on_errors; /* What to do on file system errors. */ + /* NTFS bootsector provided information. */ + u16 sector_size; /* in bytes */ + u8 sector_size_bits; /* log2(sector_size) */ + u32 cluster_size; /* in bytes */ + u32 cluster_size_mask; /* cluster_size - 1 */ + u8 cluster_size_bits; /* log2(cluster_size) */ + u32 mft_record_size; /* in bytes */ + u32 mft_record_size_mask; /* mft_record_size - 1 */ + u8 mft_record_size_bits; /* log2(mft_record_size) */ + u32 index_record_size; /* in bytes */ + u32 index_record_size_mask; /* index_record_size - 1 */ + u8 index_record_size_bits; /* log2(index_record_size) */ + LCN nr_clusters; /* Volume size in clusters == number of + bits in lcn bitmap. */ + LCN mft_lcn; /* Cluster location of mft data. */ + LCN mftmirr_lcn; /* Cluster location of copy of mft. */ + u64 serial_no; /* The volume serial number. */ + /* Mount specific NTFS information. */ + u32 upcase_len; /* Number of entries in upcase[]. */ + uchar_t *upcase; /* The upcase table. */ + LCN mft_zone_start; /* First cluster of the mft zone. */ + LCN mft_zone_end; /* First cluster beyond the mft zone. */ + struct inode *mft_ino; /* The VFS inode of $MFT. */ + + struct inode *mftbmp_ino; /* Attribute inode for $MFT/$BITMAP. */ + struct rw_semaphore mftbmp_lock; /* Lock for serializing accesses to the + mft record bitmap ($MFT/$BITMAP). */ + unsigned long nr_mft_records; /* Number of mft records == number of + bits in mft bitmap. */ + + struct inode *mftmirr_ino; /* The VFS inode of $MFTMirr. */ + struct inode *lcnbmp_ino; /* The VFS inode of $Bitmap. */ + struct rw_semaphore lcnbmp_lock; /* Lock for serializing accesses to the + cluster bitmap ($Bitmap/$DATA). */ + struct inode *vol_ino; /* The VFS inode of $Volume. */ + unsigned long vol_flags; /* Volume flags (VOLUME_*). */ + u8 major_ver; /* Ntfs major version of volume. */ + u8 minor_ver; /* Ntfs minor version of volume. */ + struct inode *root_ino; /* The VFS inode of the root + directory. */ + struct inode *secure_ino; /* The VFS inode of $Secure (NTFS3.0+ + only, otherwise NULL). */ + struct nls_table *nls_map; +} ntfs_volume; + +/* + * Defined bits for the flags field in the ntfs_volume structure. + */ +typedef enum { + NV_Errors, /* 1: Volume has errors, prevent remount rw. */ + NV_ShowSystemFiles, /* 1: Return system files in ntfs_readdir(). */ + NV_CaseSensitive, /* 1: Treat file names as case sensitive and + create filenames in the POSIX namespace. + Otherwise be case insensitive and create + file names in WIN32 namespace. */ +} ntfs_volume_flags; + +/* + * Macro tricks to expand the NVolFoo(), NVolSetFoo(), and NVolClearFoo() + * functions. + */ +#define NVOL_FNS(flag) \ +static inline int NVol##flag(ntfs_volume *vol) \ +{ \ + return test_bit(NV_##flag, &(vol)->flags); \ +} \ +static inline void NVolSet##flag(ntfs_volume *vol) \ +{ \ + set_bit(NV_##flag, &(vol)->flags); \ +} \ +static inline void NVolClear##flag(ntfs_volume *vol) \ +{ \ + clear_bit(NV_##flag, &(vol)->flags); \ +} + +/* Emit the ntfs volume bitops functions. */ +NVOL_FNS(Errors) +NVOL_FNS(ShowSystemFiles) +NVOL_FNS(CaseSensitive) + +#endif /* _LINUX_NTFS_VOLUME_H */ + diff -urN linux-2.4.24-vanilla/include/asm-i386/kmap_types.h linux-2.4.24-ntfs-2.1.6a/include/asm-i386/kmap_types.h --- linux-2.4.24-vanilla/include/asm-i386/kmap_types.h 2003-08-25 12:44:43.000000000 +0100 +++ linux-2.4.24-ntfs-2.1.6a/include/asm-i386/kmap_types.h 2004-01-21 14:28:25.000000000 +0000 @@ -8,6 +8,7 @@ KM_USER0, KM_USER1, KM_BH_IRQ, + KM_BIO_IRQ, KM_SOFTIRQ0, KM_SOFTIRQ1, KM_TYPE_NR diff -urN linux-2.4.24-vanilla/include/asm-ppc/kmap_types.h linux-2.4.24-ntfs-2.1.6a/include/asm-ppc/kmap_types.h --- linux-2.4.24-vanilla/include/asm-ppc/kmap_types.h 2003-08-25 12:44:44.000000000 +0100 +++ linux-2.4.24-ntfs-2.1.6a/include/asm-ppc/kmap_types.h 2004-01-21 14:28:25.000000000 +0000 @@ -9,6 +9,7 @@ KM_USER0, KM_USER1, KM_BH_IRQ, + KM_BIO_IRQ, KM_SOFTIRQ0, KM_SOFTIRQ1, KM_TYPE_NR diff -urN linux-2.4.24-vanilla/include/asm-sparc/kmap_types.h linux-2.4.24-ntfs-2.1.6a/include/asm-sparc/kmap_types.h --- linux-2.4.24-vanilla/include/asm-sparc/kmap_types.h 2003-08-25 12:44:44.000000000 +0100 +++ linux-2.4.24-ntfs-2.1.6a/include/asm-sparc/kmap_types.h 2004-01-21 14:28:25.000000000 +0000 @@ -8,6 +8,7 @@ KM_USER0, KM_USER1, KM_BH_IRQ, + KM_BIO_IRQ, KM_SOFTIRQ0, KM_SOFTIRQ1, KM_TYPE_NR diff -urN linux-2.4.24-vanilla/include/linux/fs.h linux-2.4.24-ntfs-2.1.6a/include/linux/fs.h --- linux-2.4.24-vanilla/include/linux/fs.h 2003-11-28 18:26:21.000000000 +0000 +++ linux-2.4.24-ntfs-2.1.6a/include/linux/fs.h 2004-01-21 14:31:28.000000000 +0000 @@ -711,7 +711,6 @@ #include #include #include -#include #include #include #include @@ -769,7 +768,6 @@ struct ext2_sb_info ext2_sb; struct ext3_sb_info ext3_sb; struct hpfs_sb_info hpfs_sb; - struct ntfs_sb_info ntfs_sb; struct msdos_sb_info msdos_sb; struct isofs_sb_info isofs_sb; struct nfs_sb_info nfs_sb; diff -urN linux-2.4.24-vanilla/include/linux/ntfs_fs.h linux-2.4.24-ntfs-2.1.6a/include/linux/ntfs_fs.h --- linux-2.4.24-vanilla/include/linux/ntfs_fs.h 2001-11-04 00:35:46.000000000 +0000 +++ linux-2.4.24-ntfs-2.1.6a/include/linux/ntfs_fs.h 1970-01-01 01:00:00.000000000 +0100 @@ -1,29 +0,0 @@ -#ifndef _LINUX_NTFS_FS_H -#define _LINUX_NTFS_FS_H - -#include - -#define NTFS_SECTOR_BITS 9 -#define NTFS_SECTOR_SIZE 512 - -/* - * Attribute flags (16-bit). - */ -typedef enum { - ATTR_IS_COMPRESSED = __constant_cpu_to_le16(0x0001), - ATTR_COMPRESSION_MASK = __constant_cpu_to_le16(0x00ff), - /* Compression method mask. Also, - * first illegal value. */ - ATTR_IS_ENCRYPTED = __constant_cpu_to_le16(0x4000), - ATTR_IS_SPARSE = __constant_cpu_to_le16(0x8000), -} __attribute__ ((__packed__)) ATTR_FLAGS; - -/* - * The two zones from which to allocate clusters. - */ -typedef enum { - MFT_ZONE, - DATA_ZONE -} NTFS_CLUSTER_ALLOCATION_ZONES; - -#endif diff -urN linux-2.4.24-vanilla/include/linux/ntfs_fs_i.h linux-2.4.24-ntfs-2.1.6a/include/linux/ntfs_fs_i.h --- linux-2.4.24-vanilla/include/linux/ntfs_fs_i.h 2001-11-22 19:46:18.000000000 +0000 +++ linux-2.4.24-ntfs-2.1.6a/include/linux/ntfs_fs_i.h 2004-01-21 14:28:25.000000000 +0000 @@ -1,91 +1,110 @@ #ifndef _LINUX_NTFS_FS_I_H #define _LINUX_NTFS_FS_I_H -#include +typedef enum { + NTFS_AT_END = 0xffffffff +} NTFS_ATTR_TYPES; + +typedef struct { + void *rl; + struct rw_semaphore lock; +} ntfs_run_list; -/* Forward declarations, to keep number of mutual includes low */ -struct ntfs_attribute; -struct ntfs_sb_info; - -/* Duplicate definitions from ntfs/ntfstypes.h */ -#ifndef NTFS_INTEGRAL_TYPES -#define NTFS_INTEGRAL_TYPES -typedef u8 ntfs_u8; -typedef u16 ntfs_u16; -typedef u32 ntfs_u32; -typedef u64 ntfs_u64; -typedef s8 ntfs_s8; -typedef s16 ntfs_s16; -typedef s32 ntfs_s32; -typedef s64 ntfs_s64; -#endif - -#ifndef NTMODE_T -#define NTMODE_T -typedef __kernel_mode_t ntmode_t; -#endif -#ifndef NTFS_UID_T -#define NTFS_UID_T -typedef uid_t ntfs_uid_t; -#endif -#ifndef NTFS_GID_T -#define NTFS_GID_T -typedef gid_t ntfs_gid_t; -#endif -#ifndef NTFS_SIZE_T -#define NTFS_SIZE_T -typedef __kernel_size_t ntfs_size_t; -#endif -#ifndef NTFS_TIME_T -#define NTFS_TIME_T -typedef __kernel_time_t ntfs_time_t; -#endif +struct inode; -/* unicode character type */ -#ifndef NTFS_WCHAR_T -#define NTFS_WCHAR_T -typedef u16 ntfs_wchar_t; -#endif -/* file offset */ -#ifndef NTFS_OFFSET_T -#define NTFS_OFFSET_T -typedef s64 ntfs_offset_t; -#endif -/* UTC */ -#ifndef NTFS_TIME64_T -#define NTFS_TIME64_T -typedef u64 ntfs_time64_t; -#endif -/* - * This is really signed long long. So we support only volumes up to 2Tb. This - * is ok as Win2k also only uses 32-bits to store clusters. - * Whatever you do keep this a SIGNED value or a lot of NTFS users with - * corrupted filesystems will lynch you! It causes massive fs corruption when - * unsigned due to the nature of many checks relying on being performed on - * signed quantities. (AIA) - */ -#ifndef NTFS_CLUSTER_T -#define NTFS_CLUSTER_T -typedef s32 ntfs_cluster_t; -#endif - -/* Definition of the NTFS in-memory inode structure. */ struct ntfs_inode_info { - struct ntfs_sb_info *vol; - unsigned long i_number; /* Should be really 48 bits. */ - __u16 sequence_number; /* The current sequence number. */ - unsigned char *attr; /* Array of the attributes. */ - int attr_count; /* Size of attrs[]. */ - struct ntfs_attribute *attrs; - int record_count; /* Size of records[]. */ - int *records; /* Array of the record numbers of the $Mft whose - * attributes have been inserted in the inode. */ + s64 initialized_size; /* Copy from $DATA/$INDEX_ALLOCATION. */ + s64 allocated_size; /* Copy from $DATA/$INDEX_ALLOCATION. */ + unsigned long state; /* NTFS specific flags describing this inode. + See ntfs_inode_state_bits below. */ + unsigned long mft_no; /* Number of the mft record / inode. */ + u16 seq_no; /* Sequence number of the mft record. */ + atomic_t count; /* Inode reference count for book keeping. */ + void *vol; /* Pointer to the ntfs volume of this inode. */ + /* + * If NInoAttr() is true, the below fields describe the attribute which + * this fake inode belongs to. The actual inode of this attribute is + * pointed to by base_ntfs_ino and nr_extents is always set to -1 (see + * below). For real inodes, we also set the type (AT_DATA for files and + * AT_INDEX_ALLOCATION for directories), with the name = NULL and + * name_len = 0 for files and name = I30 (global constant) and + * name_len = 4 for directories. + */ + NTFS_ATTR_TYPES type; /* Attribute type of this fake inode. */ + void *name; /* Attribute name of this fake inode. */ + u32 name_len; /* Attribute name length of this fake inode. */ + ntfs_run_list run_list; /* If state has the NI_NonResident bit set, + the run list of the unnamed data attribute + (if a file) or of the index allocation + attribute (directory) or of the attribute + described by the fake inode (if NInoAttr()). + If run_list.rl is NULL, the run list has not + been read in yet or has been unmapped. If + NI_NonResident is clear, the attribute is + resident (file and fake inode) or there is + no $I30 index allocation attribute + (small directory). In the latter case + run_list.rl is always NULL.*/ + /* + * The following fields are only valid for real inodes and extent + * inodes. + */ + struct semaphore mrec_lock; /* Lock for serializing access to the + mft record belonging to this inode. */ + struct page *page; /* The page containing the mft record of the + inode. This should only be touched by the + (un)map_mft_record*() functions. */ + int page_ofs; /* Offset into the page at which the mft record + begins. This should only be touched by the + (un)map_mft_record*() functions. */ + /* + * Attribute list support (only for use by the attribute lookup + * functions). Setup during read_inode for all inodes with attribute + * lists. Only valid if NI_AttrList is set in state, and attr_list_rl is + * further only valid if NI_AttrListNonResident is set. + */ + u32 attr_list_size; /* Length of attribute list value in bytes. */ + u8 *attr_list; /* Attribute list value itself. */ + ntfs_run_list attr_list_rl; /* Run list for the attribute list value. */ union { - struct { - int recordsize; - int clusters_per_record; - } index; - } u; + struct { /* It is a directory or $MFT. */ + struct inode *bmp_ino; /* Attribute inode for the + directory index $BITMAP. */ + u32 index_block_size; /* Size of an index block. */ + u32 index_vcn_size; /* Size of a vcn in this + directory index. */ + u8 index_block_size_bits; /* Log2 of the above. */ + u8 index_vcn_size_bits; /* Log2 of the above. */ + } m; + struct { /* It is a compressed file or fake inode. */ + s64 compressed_size; /* Copy from $DATA. */ + u32 compression_block_size; /* Size of a compression + block (cb). */ + u8 compression_block_size_bits; /* Log2 of the size of + a cb. */ + u8 compression_block_clusters; /* Number of clusters + per compression + block. */ + } f; + } c; + struct semaphore extent_lock; /* Lock for accessing/modifying the + below . */ + s32 nr_extents; /* For a base mft record, the number of attached extent + inodes (0 if none), for extent records and for fake + inodes describing an attribute this is -1. */ + union { /* This union is only used if nr_extents != 0. */ + void **extent_ntfs_inos; /* For nr_extents > 0, array of + the ntfs inodes of the extent + mft records belonging to + this base inode which have + been loaded. */ + void *base_ntfs_ino; /* For nr_extents == -1, the + ntfs inode of the base mft + record. For fake inodes, the + real (base) inode to which + the attribute belongs. */ + } e; }; #endif + diff -urN linux-2.4.24-vanilla/include/linux/ntfs_fs_sb.h linux-2.4.24-ntfs-2.1.6a/include/linux/ntfs_fs_sb.h --- linux-2.4.24-vanilla/include/linux/ntfs_fs_sb.h 2001-11-22 19:46:18.000000000 +0000 +++ linux-2.4.24-ntfs-2.1.6a/include/linux/ntfs_fs_sb.h 1970-01-01 01:00:00.000000000 +0100 @@ -1,61 +0,0 @@ -#ifndef _LINUX_NTFS_FS_SB_H -#define _LINUX_NTFS_FS_SB_H - -#include - -struct ntfs_sb_info{ - /* Configuration provided by user at mount time. */ - ntfs_uid_t uid; - ntfs_gid_t gid; - ntmode_t umask; - void *nls_map; - unsigned int ngt; - char mft_zone_multiplier; - unsigned long mft_data_pos; - ntfs_cluster_t mft_zone_pos; - ntfs_cluster_t mft_zone_start; - ntfs_cluster_t mft_zone_end; - ntfs_cluster_t data1_zone_pos; - ntfs_cluster_t data2_zone_pos; - /* Configuration provided by user with the ntfstools. - * FIXME: This is no longer possible. What is this good for? (AIA) */ - ntfs_size_t partition_bias; /* For access to underlying device. */ - /* Attribute definitions. */ - ntfs_u32 at_standard_information; - ntfs_u32 at_attribute_list; - ntfs_u32 at_file_name; - ntfs_u32 at_volume_version; - ntfs_u32 at_security_descriptor; - ntfs_u32 at_volume_name; - ntfs_u32 at_volume_information; - ntfs_u32 at_data; - ntfs_u32 at_index_root; - ntfs_u32 at_index_allocation; - ntfs_u32 at_bitmap; - ntfs_u32 at_symlink; /* aka SYMBOLIC_LINK or REPARSE_POINT */ - /* Data read / calculated from the boot file. */ - int sector_size; - int cluster_size; - int cluster_size_bits; - int mft_clusters_per_record; - int mft_record_size; - int mft_record_size_bits; - int index_clusters_per_record; - int index_record_size; - int index_record_size_bits; - ntfs_cluster_t nr_clusters; - ntfs_cluster_t mft_lcn; - ntfs_cluster_t mft_mirr_lcn; - /* Data read from special files. */ - unsigned char *mft; - unsigned short *upcase; - unsigned int upcase_length; - /* Inodes we always hold onto. */ - struct ntfs_inode_info *mft_ino; - struct ntfs_inode_info *mftmirr; - struct ntfs_inode_info *bitmap; - struct super_block *sb; - unsigned char ino_flags; -}; - -#endif diff -urN linux-2.4.24-vanilla/kernel/ksyms.c linux-2.4.24-ntfs-2.1.6a/kernel/ksyms.c --- linux-2.4.24-vanilla/kernel/ksyms.c 2003-11-28 18:26:21.000000000 +0000 +++ linux-2.4.24-ntfs-2.1.6a/kernel/ksyms.c 2004-01-21 14:29:36.000000000 +0000 @@ -172,7 +172,9 @@ EXPORT_SYMBOL(d_lookup); EXPORT_SYMBOL(__d_path); EXPORT_SYMBOL(mark_buffer_dirty); -EXPORT_SYMBOL(set_buffer_async_io); /* for reiserfs_writepage */ +EXPORT_SYMBOL(set_buffer_async_io); +EXPORT_SYMBOL(end_buffer_io_sync); +EXPORT_SYMBOL(__mark_dirty); EXPORT_SYMBOL(__mark_buffer_dirty); EXPORT_SYMBOL(__mark_inode_dirty); EXPORT_SYMBOL(fd_install);