Compare commits

..

2 Commits

Author SHA1 Message Date
7db92bab06 2.3.15 2020-12-09 05:02:15 -07:00
b97c97941f suggest @root/walk 2020-12-09 05:01:29 -07:00
2 changed files with 136 additions and 139 deletions

270
README.md
View File

@ -1,60 +1,66 @@
node-walk # 2021 Update
====
| Sponsored by [ppl](https://ppl.family) Consider using [`@root/walk`](https://npmjs.org/package/@root/walk) instead.
I created `walk` quite literally a decade ago, in the Node v0.x days.
Back then using an EventEmitter seemed like the thing to do. Nowadays,
it seems a bit overkill for the simple task of walking over directories.
There's nothing wrong with `walk` - it's about the same as it was 10 years ago -
however, at only 50 lines of code long, `@root/walk` is much simpler and much faster.
# node-walk
| a [Root](https://rootprojects.org) project
nodejs walk implementation. nodejs walk implementation.
This is somewhat of a port python's `os.walk`, but using Node.JS conventions. This is somewhat of a port python's `os.walk`, but using Node.JS conventions.
* EventEmitter - EventEmitter
* Asynchronous - Asynchronous
* Chronological (optionally) - Chronological (optionally)
* Built-in flow-control - Built-in flow-control
* includes Synchronous version (same API as Asynchronous) - includes Synchronous version (same API as Asynchronous)
As few file descriptors are opened at a time as possible. As few file descriptors are opened at a time as possible.
This is particularly well suited for single hard disks which are not flash or solid state. This is particularly well suited for single hard disks which are not flash or solid state.
Installation ## Installation
----
```bash ```bash
npm install --save walk npm install --save walk
``` ```
Getting Started # Getting Started
====
```javascript ```javascript
(function () { 'use strict';
"use strict";
var walk = require('walk'); var walk = require('walk');
var fs = require('fs'); var fs = require('fs');
var walker; var walker;
var options = {};
walker = walk.walk("/tmp", options); walker = walk.walk('/tmp', options);
walker.on("file", function (root, fileStats, next) { walker.on('file', function (root, fileStats, next) {
fs.readFile(fileStats.name, function () { fs.readFile(fileStats.name, function () {
// doStuff // doStuff
next();
});
});
walker.on("errors", function (root, nodeStatsArray, next) {
next(); next();
}); });
});
walker.on("end", function () { walker.on('errors', function (root, nodeStatsArray, next) {
console.log("all done"); next();
}); });
}());
walker.on('end', function () {
console.log('all done');
});
``` ```
Common Events ## Common Events
-----
All single event callbacks are in the form of `function (root, stat, next) {}`. All single event callbacks are in the form of `function (root, stat, next) {}`.
@ -63,16 +69,15 @@ All multiple event callbacks callbacks are in the form of `function (root, stats
All **error** event callbacks are in the form `function (root, stat/stats, next) {}`. All **error** event callbacks are in the form `function (root, stat/stats, next) {}`.
**`stat.error`** contains the error. **`stat.error`** contains the error.
* `names` - `names`
* `directory` - `directory`
* `directories` - `directories`
* `file` - `file`
* `files` - `files`
* `end` - `end`
* `nodeError` (`stat` failed) - `nodeError` (`stat` failed)
* `directoryError` (`stat` succedded, but `readdir` failed) - `directoryError` (`stat` succedded, but `readdir` failed)
* `errors` (a collection of any errors encountered) - `errors` (a collection of any errors encountered)
A typical `stat` event looks like this: A typical `stat` event looks like this:
@ -95,63 +100,60 @@ A typical `stat` event looks like this:
type: 'file' } type: 'file' }
``` ```
Advanced Example # Advanced Example
====
Both Asynchronous and Synchronous versions are provided. Both Asynchronous and Synchronous versions are provided.
```javascript ```javascript
(function () { 'use strict';
"use strict";
var walk = require('walk'); var walk = require('walk');
var fs = require('fs'); var fs = require('fs');
var options; var options;
var walker; var walker;
options = { options = {
followLinks: false followLinks: false,
// directories with these keys will be skipped // directories with these keys will be skipped
, filters: ["Temp", "_Temp"] filters: ['Temp', '_Temp'],
}; };
walker = walk.walk("/tmp", options); walker = walk.walk('/tmp', options);
// OR // OR
// walker = walk.walkSync("/tmp", options); // walker = walk.walkSync("/tmp", options);
walker.on("names", function (root, nodeNamesArray) { walker.on('names', function (root, nodeNamesArray) {
nodeNamesArray.sort(function (a, b) { nodeNamesArray.sort(function (a, b) {
if (a > b) return 1; if (a > b) return 1;
if (a < b) return -1; if (a < b) return -1;
return 0; return 0;
});
}); });
});
walker.on("directories", function (root, dirStatsArray, next) { walker.on('directories', function (root, dirStatsArray, next) {
// dirStatsArray is an array of `stat` objects with the additional attributes // dirStatsArray is an array of `stat` objects with the additional attributes
// * type // * type
// * error // * error
// * name // * name
next();
});
walker.on('file', function (root, fileStats, next) {
fs.readFile(fileStats.name, function () {
// doStuff
next(); next();
}); });
});
walker.on("file", function (root, fileStats, next) { walker.on('errors', function (root, nodeStatsArray, next) {
fs.readFile(fileStats.name, function () { next();
// doStuff });
next();
});
});
walker.on("errors", function (root, nodeStatsArray, next) { walker.on('end', function () {
next(); console.log('all done');
}); });
walker.on("end", function () {
console.log("all done");
});
}());
``` ```
### Sync ### Sync
@ -166,7 +168,7 @@ I don't think I can prevent the `process.nextTick()` that `EventEmitter` calls.
```javascript ```javascript
(function () { (function () {
"use strict"; 'use strict';
var walk = require('walk'); var walk = require('walk');
var fs = require('fs'); var fs = require('fs');
@ -183,89 +185,88 @@ I don't think I can prevent the `process.nextTick()` that `EventEmitter` calls.
if (a < b) return -1; if (a < b) return -1;
return 0; return 0;
}); });
} },
, directories: function (root, dirStatsArray, next) { directories: function (root, dirStatsArray, next) {
// dirStatsArray is an array of `stat` objects with the additional attributes // dirStatsArray is an array of `stat` objects with the additional attributes
// * type // * type
// * error // * error
// * name // * name
next(); next();
} },
, file: function (root, fileStats, next) { file: function (root, fileStats, next) {
fs.readFile(fileStats.name, function () { fs.readFile(fileStats.name, function () {
// doStuff // doStuff
next(); next();
}); });
} },
, errors: function (root, nodeStatsArray, next) { errors: function (root, nodeStatsArray, next) {
next(); next();
} },
} },
}; };
walker = walk.walkSync("/tmp", options); walker = walk.walkSync('/tmp', options);
console.log("all done"); console.log('all done');
}()); })();
``` ```
API # API
====
Emitted Values Emitted Values
* `on('XYZ', function(root, stats, next) {})` - `on('XYZ', function(root, stats, next) {})`
* `root` - the containing the files to be inspected - `root` - the containing the files to be inspected
* *stats[Array]* - a single `stats` object or an array with some added attributes - _stats[Array]_ - a single `stats` object or an array with some added attributes
* type - 'file', 'directory', etc - type - 'file', 'directory', etc
* error - error
* name - the name of the file, dir, etc - name - the name of the file, dir, etc
* next - no more files will be read until this is called - next - no more files will be read until this is called
Single Events - fired immediately Single Events - fired immediately
* `end` - No files, dirs, etc left to inspect - `end` - No files, dirs, etc left to inspect
* `directoryError` - Error when `fstat` succeeded, but reading path failed (Probably due to permissions). - `directoryError` - Error when `fstat` succeeded, but reading path failed (Probably due to permissions).
* `nodeError` - Error `fstat` did not succeeded. - `nodeError` - Error `fstat` did not succeeded.
* `node` - a `stats` object for a node of any type - `node` - a `stats` object for a node of any type
* `file` - includes links when `followLinks` is `true` - `file` - includes links when `followLinks` is `true`
* `directory` - **NOTE** you could get a recursive loop if `followLinks` and a directory links to its parent - `directory` - **NOTE** you could get a recursive loop if `followLinks` and a directory links to its parent
* `symbolicLink` - always empty when `followLinks` is `true` - `symbolicLink` - always empty when `followLinks` is `true`
* `blockDevice` - `blockDevice`
* `characterDevice` - `characterDevice`
* `FIFO` - `FIFO`
* `socket` - `socket`
Events with Array Arguments - fired after all files in the dir have been `stat`ed Events with Array Arguments - fired after all files in the dir have been `stat`ed
* `names` - before any `stat` takes place. Useful for sorting and filtering. - `names` - before any `stat` takes place. Useful for sorting and filtering.
* Note: the array is an array of `string`s, not `stat` objects
* Note: the `next` argument is a `noop`
* `errors` - errors encountered by `fs.stat` when reading ndes in a directory - Note: the array is an array of `string`s, not `stat` objects
* `nodes` - an array of `stats` of any type - Note: the `next` argument is a `noop`
* `files`
* `directories` - modification of this array - sorting, removing, etc - affects traversal - `errors` - errors encountered by `fs.stat` when reading ndes in a directory
* `symbolicLinks` - `nodes` - an array of `stats` of any type
* `blockDevices` - `files`
* `characterDevices` - `directories` - modification of this array - sorting, removing, etc - affects traversal
* `FIFOs` - `symbolicLinks`
* `sockets` - `blockDevices`
- `characterDevices`
- `FIFOs`
- `sockets`
**Warning** beware of infinite loops when `followLinks` is true (using `walk-recurse` varient). **Warning** beware of infinite loops when `followLinks` is true (using `walk-recurse` varient).
Comparisons # Comparisons
====
Tested on my `/System` containing 59,490 (+ self) directories (and lots of files). Tested on my `/System` containing 59,490 (+ self) directories (and lots of files).
The size of the text output was 6mb. The size of the text output was 6mb.
`find`: `find`:
time bash -c "find /System -type d | wc" time bash -c "find /System -type d | wc"
59491 97935 6262916 59491 97935 6262916
real 2m27.114s real 2m27.114s
user 0m1.193s user 0m1.193s
@ -295,12 +296,11 @@ Note that `find.js` omits the start directory
In conclusion node.js asynchronous walk is much slower than regular "find". In conclusion node.js asynchronous walk is much slower than regular "find".
LICENSE # LICENSE
===
`node-walk` is available under the following licenses: `node-walk` is available under the following licenses:
* MIT - MIT
* Apache 2 - Apache 2
Copyright 2011 - Present AJ ONeal Copyright 2011 - Present AJ ONeal

View File

@ -17,7 +17,7 @@
}, },
"lib": ".", "lib": ".",
"main": "./lib/walk.js", "main": "./lib/walk.js",
"version": "2.3.14", "version": "2.3.15",
"files": [ "files": [
"lib" "lib"
], ],
@ -33,9 +33,6 @@
"example": "examples", "example": "examples",
"test": "test" "test": "test"
}, },
"files": [
"lib"
],
"devDependencies": {}, "devDependencies": {},
"scripts": { "scripts": {
"test": "./test/walk-test.sh" "test": "./test/walk-test.sh"