module DirElements (dirElem) where
import System.Directory (getDirectoryContents, doesDirectoryExist)
import System.FilePath ((>))
dirElem :: FilePath -> IO [[FilePath]]
dirElem dirPath = iterateM (not.null) (concatMapM getDirectoryContents') [dirPath] >>= return.tail
getDirectoryContents' :: FilePath- > IO [FilePath]
getDirectoryContents' dirPath = do
isDir <- do doesDirectoryExist dirPath
if isDir then dirContent else return [] where
dirContent = do
contents < -getDirectoryContents dirPath
return.(map (dirPath>)).tail.tail $contents
iterateM :: (Monad m) => (a -> Bool) -> ( a -> ma) -> a -> m [a]
iterateM fb fx = do --Notice: Due to the implementation of >>=, iterateM can't be writen like iterate which gives a infinite list and have type of iterateM :: (Monad m) => (a -> Bool) -> (a -> ma) -> a -> m [a]
if fb x
then do
tail <- do {fx <- fx; iterateM fb f fx}< br /> return (x:tail)
else return []
concatMapM :: Monad m => (a -> m[b]) -> [a] -> m[ b]
concatMapM f list = mapM f list >>= return.concat
It works fine, but when executed on a large directory, it will “pause” for a while and pop up all the results .
After researching, I found that it is the same as the problem of the sequence $map return [1 ..] :: [[Int]], see Why the Haskell sequence function can’t be lazy or why recursive monadic functions can’t be lazy
It uses StateP to keep the queue of untraversed directories so that it can do breadth-first traversal. For convenience, it uses MaybeP to exit the loop.
import Control.Monad
import Control.Proxy
import Control.Proxy.Trans.Maybe
import Control.Proxy.Trans.State as S
import Data.Sequence hiding (filter)
import System.FilePath.Posix
import System.Directory
getUsefulContents :: FilePath -> IO [FilePath]
getUsefulContents path
= fmap (fi lter (`notElem` [".", ".."])) $getDirectoryContents path
traverseTree
:: (Proxy p)
=> FilePath
-> () -> Producer (MaybeP (StateP (Seq FilePath) p)) FilePath IO r
traverseTree path () = do
liftP $S.modify (|> path)
forever $do
x <- liftP $S.gets viewl
case x of
EmptyL -> mzero
file :< s -> do
liftP $S.put s
respond file
p <- lift $doesDirectoryExist file
when p $do
names <- lift $getUsefulContents file
let namesfull = map (file > ) names
liftP $forM_ namesfull $ ame ->
S.modify (|> name)
This defines a breadth-first lazy file generator. If you connect it At the printing stage, it will print out the file when traversing the tree:
main = runProxy $evalStateK empty $runMaybeK $
traverseTree "/tmp" >-> putStrLnD
Laziness means that if you only need 3 files, it will only root Traverse the tree to generate three files as needed, and then it will stop:
main = runProxy $evalStateK empty $runMaybeK $
traverseTree "/tmp" >-> takeB_ 3 >-> putStrLnD
If you want to know more about the pipes
library, then I suggest you read the tutorial.
p>
I wrote a Haskell module that lists all the contents of the directory in breadth-first order. The following is the source code.
module DirElements (dirElem) where
import System.Directory (getDirectoryContents, doesDirectoryExist)
import System.FilePath ((>))
dirElem :: FilePath -> IO [[ FilePath]]
dirElem dirPath = iterateM (not.null) (concatMapM getDirectoryContents') [dirPath] >>= return.tail
getDirectoryContents' :: FilePath -> IO [FilePath]< br />getDirectoryContents' dirPath = do
isDir <- do doesDirectoryExist dirPath
if isDir then dirContent else return [] where
dirContent = do
contents <- getDirectoryContents dirPath
return.(map (dirPath>)).tail.tail $contents
iterateM :: (Monad m) => (a -> Bool) -> (a -> ma)- > a -> m [a]
iterateM fb fx = do --Notice: Due to the implementation of >>=, iterateM can't be writen like iterate which gives a infinite list and have type of iterateM: : (Monad m) => (a -> Bool) -> (a -> ma) -> a -> m [a]
if fb x
then do
tail <- do {fx <- fx; iterateM fb f fx}
return (x:tail)
else return []
concatMapM :: Monad m => (a -> m [b]) -> [a] -> m[b]
concatMapM f list = mapM f list >>= return.concat
It works fine, but when executed on a large directory , It will “pause” for a period of time and pop up all the results.
After research, I found that it has the same problem as the sequence $map return [1 ..] :: [[Int]], see Why the Haskell sequence function can’t be lazy or why recursive monadic functions can’t be lazy
I modified the old answer from Davorak link to use the new pipeline Library.
It uses StateP to keep the queue of untraversed directories so that it can do breadth-first traversal. For convenience, it uses MaybeP to exit the loop.
p>
import Control.Monad
import Control.Proxy
import Control.Proxy.Trans.Maybe
import Control.Proxy.Trans.State as S
import Data. Sequence hiding (filter)
import Syst em.FilePath.Posix
import System.Directory
getUsefulContents :: FilePath -> IO [FilePath]
getUsefulContents path
= fmap (filter (`notElem` [ ".", ".."])) $getDirectoryContents path
traverseTree
:: (Proxy p)
=> FilePath
-> () -> Producer (MaybeP (StateP (Seq FilePath) p)) FilePath IO r
traverseTree path () = do
liftP $S.modify (|> path)
forever $do
x <- liftP $S.gets viewl
case x of
EmptyL -> mzero
file :< s -> do
liftP $S.put s
respond file
p <- lift $doesDirectoryExist file
when p $do
names <- lift $getUsefulContents file
let namesfull = map (file >) names
liftP $forM_ namesfull $ ame ->
S.modify (|> name)
This defines a breadth-first lazy file generator. If you connect it to the printing stage, it will All over Print out the file when traversing the tree:
main = runProxy $evalStateK empty $runMaybeK $
traverseTree "/tmp" >-> putStrLnD
Laziness means that if you only need 3 files, it will only traverse the tree to generate three files as needed, and then it will stop:
main = runProxy $evalStateK empty $ runMaybeK $
traverseTree "/tmp" >-> takeB_ 3 >-> putStrLnD
If you want to know more about the pipes
library, then I suggest you Read the tutorial.