{-# LANGUAGE CPP, ScopedTypeVariables #-}
{- |
   Module      : Data.FileStore.Utils
   Copyright   : Copyright (C) 2009 John MacFarlane, Gwern Branwen
   License     : BSD 3

   Maintainer  : John MacFarlane <jgm@berkeley.edu>
   Stability   : alpha
   Portability : portable

   Utility functions for running external processes.
-}

module Data.FileStore.Utils (
          runShellCommand
        , mergeContents
        , hashsMatch
        , escapeRegexSpecialChars
        , parseMatchLine
        , splitEmailAuthor
        , ensureFileExists
        , regSearchFiles
        , regsSearchFile
        , withSanityCheck
        , grepSearchRepo 
        , withVerifyDir
        , encodeArg ) where

import Control.Exception (throwIO)
import Control.Applicative ((<$>))
import Control.Monad (liftM, liftM2, when, unless)
import Data.ByteString.Lazy.UTF8 (toString)
import Data.Char (isSpace)
import Data.List (intersect, nub, isPrefixOf, isInfixOf)
import Data.List.Split (splitWhen)
import Data.Maybe (isJust)
import System.Directory (doesFileExist, getTemporaryDirectory, removeFile, findExecutable, createDirectoryIfMissing, getDirectoryContents)
import System.Exit (ExitCode(..))
import System.FilePath ((</>), takeDirectory)
import System.IO (openTempFile, hClose)
import System.IO.Error (isDoesNotExistError)
import System.Process (runProcess, waitForProcess)
import System.Environment (getEnvironment)
import qualified Data.ByteString.Lazy as B
import qualified Data.ByteString as S
import qualified Control.Exception as E
#if MIN_VERSION_base(4,5,0)
#else
import Codec.Binary.UTF8.String (encodeString)
#endif

import Data.FileStore.Types (SearchMatch(..), FileStoreError(IllegalResourceName, NotFound, UnknownError), SearchQuery(..))

-- | Encode argument for raw command.
encodeArg :: String -> String
#if MIN_VERSION_base(4,5,0)
encodeArg :: String -> String
encodeArg = String -> String
forall a. a -> a
id
#else
encodeArg = encodeString
#endif

-- | Run shell command and return error status, standard output, and error output.  Assumes
-- UTF-8 locale. Note that this does not actually go through \/bin\/sh!
runShellCommand :: FilePath                     -- ^ Working directory
                -> Maybe [(String, String)]     -- ^ Environment
                -> String                       -- ^ Command
                -> [String]                     -- ^ Arguments
                -> IO (ExitCode, B.ByteString, B.ByteString)
runShellCommand :: String
-> Maybe [(String, String)]
-> String
-> [String]
-> IO (ExitCode, ByteString, ByteString)
runShellCommand String
workingDir Maybe [(String, String)]
environment String
command [String]
optionList = do
  tempPath <- IO String -> (SomeException -> IO String) -> IO String
forall e a. Exception e => IO a -> (e -> IO a) -> IO a
E.catch IO String
getTemporaryDirectory (\(SomeException
_ :: E.SomeException) -> String -> IO String
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return String
".")
  (outputPath, hOut) <- openTempFile tempPath "out"
  (errorPath, hErr) <- openTempFile tempPath "err"
  env <- liftM2 (++) environment . Just <$> getEnvironment
  hProcess <- runProcess (encodeArg command) (map encodeArg optionList) (Just workingDir) env Nothing (Just hOut) (Just hErr)
  status <- waitForProcess hProcess
  errorOutput <- S.readFile errorPath
  output <- S.readFile outputPath
  removeFile errorPath
  removeFile outputPath
  return (status, B.fromChunks [errorOutput], B.fromChunks [output])

-- | Do a three way merge, using either git merge-file or RCS merge.  Assumes
-- that either @git@ or @merge@ is in the system path.  Assumes UTF-8 locale.
mergeContents :: (String, B.ByteString)     -- ^ (label, contents) of edited version
              -> (String, B.ByteString)     -- ^ (label, contents) of original revision
              -> (String, B.ByteString)     -- ^ (label, contents) of latest version
              -> IO (Bool, String)          -- ^ (were there conflicts?, merged contents)
mergeContents :: (String, ByteString)
-> (String, ByteString)
-> (String, ByteString)
-> IO (Bool, String)
mergeContents (String
newLabel, ByteString
newContents) (String
originalLabel, ByteString
originalContents) (String
latestLabel, ByteString
latestContents) = do
  tempPath <- IO String -> (SomeException -> IO String) -> IO String
forall e a. Exception e => IO a -> (e -> IO a) -> IO a
E.catch IO String
getTemporaryDirectory (\(SomeException
_ :: E.SomeException) -> String -> IO String
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return String
".")
  (originalPath, hOriginal) <- openTempFile tempPath "orig"
  (latestPath, hLatest)     <- openTempFile tempPath "latest"
  (newPath, hNew)           <- openTempFile tempPath "new"
  B.hPutStr hOriginal originalContents >> hClose hOriginal
  B.hPutStr hLatest latestContents >> hClose hLatest
  B.hPutStr hNew newContents >> hClose hNew
  gitExists <- liftM isJust (findExecutable "git")
  (conflicts, mergedContents) <-
    if gitExists
       then do
         (status, err, out) <- runShellCommand tempPath Nothing "git" ["merge-file", "--stdout", "-L", newLabel, "-L",
                                     originalLabel, "-L", latestLabel, newPath, originalPath, latestPath]
         case status of
              ExitCode
ExitSuccess             -> (Bool, ByteString) -> IO (Bool, ByteString)
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (Bool
False, ByteString
out)
              ExitFailure Int
n | Int
n Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
0  -> (Bool, ByteString) -> IO (Bool, ByteString)
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (Bool
True, ByteString
out)
              ExitCode
_                       -> String -> IO (Bool, ByteString)
forall a. HasCallStack => String -> a
error (String -> IO (Bool, ByteString))
-> String -> IO (Bool, ByteString)
forall a b. (a -> b) -> a -> b
$ String
"merge failed: " String -> String -> String
forall a. [a] -> [a] -> [a]
++ ByteString -> String
toString ByteString
err
       else do
         mergeExists <- liftM isJust (findExecutable "merge")
         if mergeExists
            then do
               (status, err, out) <- runShellCommand tempPath Nothing "merge" ["-p", "-q", "-L", newLabel, "-L",
                                          originalLabel, "-L", latestLabel, newPath, originalPath, latestPath]
               case status of
                    ExitCode
ExitSuccess             -> (Bool, ByteString) -> IO (Bool, ByteString)
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (Bool
False, ByteString
out)
                    ExitFailure Int
1           -> (Bool, ByteString) -> IO (Bool, ByteString)
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (Bool
True, ByteString
out)
                    ExitCode
_                       -> String -> IO (Bool, ByteString)
forall a. HasCallStack => String -> a
error (String -> IO (Bool, ByteString))
-> String -> IO (Bool, ByteString)
forall a b. (a -> b) -> a -> b
$ String
"merge failed: " String -> String -> String
forall a. [a] -> [a] -> [a]
++ ByteString -> String
toString ByteString
err
            else error "mergeContents requires 'git' or 'merge', and neither was found in the path."
  removeFile originalPath
  removeFile latestPath
  removeFile newPath
  return (conflicts, toString mergedContents)

escapeRegexSpecialChars :: String -> String
escapeRegexSpecialChars :: String -> String
escapeRegexSpecialChars = String -> String -> String
forall {t :: * -> *}. Foldable t => t Char -> String -> String
backslashEscape String
"?*+{}[]\\^$.()"
  where backslashEscape :: t Char -> String -> String
backslashEscape t Char
chars (Char
x:String
xs) | Char
x Char -> t Char -> Bool
forall a. Eq a => a -> t a -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` t Char
chars = Char
'\\' Char -> String -> String
forall a. a -> [a] -> [a]
: Char
x Char -> String -> String
forall a. a -> [a] -> [a]
: t Char -> String -> String
backslashEscape t Char
chars String
xs
        backslashEscape t Char
chars (Char
x:String
xs)                  = Char
x Char -> String -> String
forall a. a -> [a] -> [a]
: t Char -> String -> String
backslashEscape t Char
chars String
xs
        backslashEscape t Char
_ []                          = []

-- | A number of VCS systems uniquely identify a particular revision or change via a
--   cryptographic hash of some sort. These hashs can be very long, and so systems like
--   Git and Darcs don't require the entire hash - a *unique prefix*. Thus a definition
--   of hash equality is '==', certainly, but also simply whether either is a prefix of the
--   other. If both are reasonably long, then the likelihood the shorter one is not a unique
--   prefix of the longer (that is, clashes with another hash) is small.
--   The burden of proof is on the caller to not pass a uselessly short short-hash like '1', however.
hashsMatch :: (Eq a) => [a] -> [a] -> Bool
hashsMatch :: forall a. Eq a => [a] -> [a] -> Bool
hashsMatch [a]
r1 [a]
r2 = [a]
r1 [a] -> [a] -> Bool
forall a. Eq a => [a] -> [a] -> Bool
`isPrefixOf` [a]
r2 Bool -> Bool -> Bool
|| [a]
r2 [a] -> [a] -> Bool
forall a. Eq a => [a] -> [a] -> Bool
`isPrefixOf` [a]
r1

-- | Inquire of a certain directory whether another file lies within its ambit.
--   This is basically asking whether the file is 'above' the directory in the filesystems's
--   directory tree. Useful for checking the legality of a filename.
--   Note: due to changes in canonicalizePath in ghc 7, we no longer have
--   a reliable way to do this; so isInsideDir is False whenever either
--   the file or the directory contains "..".
isInsideDir :: FilePath -> FilePath -> Bool
isInsideDir :: String -> String -> Bool
isInsideDir String
name String
dir = String
dir String -> String -> Bool
forall a. Eq a => [a] -> [a] -> Bool
`isPrefixOf` String
name
  Bool -> Bool -> Bool
&& Bool -> Bool
not (String
".." String -> String -> Bool
forall a. Eq a => [a] -> [a] -> Bool
`isInfixOf` String
dir) Bool -> Bool -> Bool
&& Bool -> Bool
not (String
".." String -> String -> Bool
forall a. Eq a => [a] -> [a] -> Bool
`isInfixOf` String
name)

-- | A parser function. This is intended for use on strings which are output by grep programs
--   or programs which mimic the standard grep output - which uses colons as delimiters and has
--   3 fields: the filename, the line number, and then the matching line itself. Note that this 
--   is for use on only strings meeting that format - if it goes "file:match", this will throw
--   a pattern-match exception.
--
-- > parseMatchLine "foo:10:bar baz quux" ~> 
-- > SearchMatch {matchResourceName = "foo", matchLineNumber = 10, matchLine = "bar baz quux"}
parseMatchLine :: String -> SearchMatch
parseMatchLine :: String -> SearchMatch
parseMatchLine String
str =
  let (String
fn:String
n:String
res:[String]
_) = (Char -> Bool) -> String -> [String]
forall a. (a -> Bool) -> [a] -> [[a]]
splitWhen (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
==Char
':') String
str
  in  SearchMatch{matchResourceName :: String
matchResourceName = String
fn, matchLineNumber :: Integer
matchLineNumber = String -> Integer
forall a. Read a => String -> a
read String
n, matchLine :: String
matchLine = String
res}

-- | Our policy is: if the input is clearly a "name \<e\@mail.com\>" input, then we return "(Just Address, Name)"
--   If there is no '<' in the input, then it clearly can't be of that format, and so we just return "(Nothing, Name)"
--
-- > splitEmailAuthor "foo bar baz@gmail.com" ~> (Nothing,"foo bar baz@gmail.com")
-- > splitEmailAuthor "foo bar <baz@gmail.com>" ~> (Just "baz@gmail.com","foo bar")
splitEmailAuthor :: String -> (Maybe String, String)
splitEmailAuthor :: String -> (Maybe String, String)
splitEmailAuthor String
x = (Maybe String
mbEmail, String -> String
trim String
name)
  where (String
name, String
rest) = (Char -> Bool) -> String -> (String, String)
forall a. (a -> Bool) -> [a] -> ([a], [a])
break (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
==Char
'<') String
x
        mbEmail :: Maybe String
mbEmail = if String -> Bool
forall a. [a] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null String
rest
                     then Maybe String
forall a. Maybe a
Nothing
                     else String -> Maybe String
forall a. a -> Maybe a
Just (String -> Maybe String) -> String -> Maybe String
forall a b. (a -> b) -> a -> b
$ (Char -> Bool) -> String -> String
forall a. (a -> Bool) -> [a] -> [a]
takeWhile (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/=Char
'>') (String -> String) -> String -> String
forall a b. (a -> b) -> a -> b
$ Int -> String -> String
forall a. Int -> [a] -> [a]
drop Int
1 String
rest

-- | Trim leading and trailing spaces
trim :: String -> String
trim :: String -> String
trim = String -> String
forall a. [a] -> [a]
reverse (String -> String) -> (String -> String) -> String -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Bool) -> String -> String
forall a. (a -> Bool) -> [a] -> [a]
dropWhile Char -> Bool
isSpace (String -> String) -> (String -> String) -> String -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> String
forall a. [a] -> [a]
reverse (String -> String) -> (String -> String) -> String -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Bool) -> String -> String
forall a. (a -> Bool) -> [a] -> [a]
dropWhile Char -> Bool
isSpace

-- | Search multiple files with a single regexp.
--   This calls out to grep, and so supports the regular expressions grep does.
regSearchFiles :: FilePath -> [String] -> String -> IO [String]
regSearchFiles :: String -> [String] -> String -> IO [String]
regSearchFiles String
repo [String]
filesToCheck String
pattern = do (_, _, result) <- String
-> Maybe [(String, String)]
-> String
-> [String]
-> IO (ExitCode, ByteString, ByteString)
runShellCommand String
repo
                                                             Maybe [(String, String)]
forall a. Maybe a
Nothing  String
"grep" ([String] -> IO (ExitCode, ByteString, ByteString))
-> [String] -> IO (ExitCode, ByteString, ByteString)
forall a b. (a -> b) -> a -> b
$ [String
"--line-number", String
"-I", String
"-l", String
"-E", String
"-e", String
pattern] [String] -> [String] -> [String]
forall a. [a] -> [a] -> [a]
++ [String]
filesToCheck
                                              let results = [String] -> [String] -> [String]
forall a. Eq a => [a] -> [a] -> [a]
intersect [String]
filesToCheck ([String] -> [String]) -> [String] -> [String]
forall a b. (a -> b) -> a -> b
$ String -> [String]
lines (String -> [String]) -> String -> [String]
forall a b. (a -> b) -> a -> b
$ ByteString -> String
toString ByteString
result
                                              return results

-- | Search a single file with multiple regexps.
regsSearchFile :: [String] -> FilePath -> [String] -> String -> IO [String]
regsSearchFile :: [String] -> String -> [String] -> String -> IO [String]
regsSearchFile [String]
os String
repo [String]
patterns String
file = do res <- (String -> IO [String]) -> [String] -> IO [[String]]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
(a -> m b) -> t a -> m (t b)
forall (m :: * -> *) a b. Monad m => (a -> m b) -> [a] -> m [b]
mapM (String -> String -> IO [String]
run String
file) [String]
patterns
                                          return $ nub $ concat res
      where run :: String -> String -> IO [String]
run String
f String
p = do (_,_,r) <- String
-> Maybe [(String, String)]
-> String
-> [String]
-> IO (ExitCode, ByteString, ByteString)
runShellCommand String
repo Maybe [(String, String)]
forall a. Maybe a
Nothing String
"grep" ([String]
os [String] -> [String] -> [String]
forall a. [a] -> [a] -> [a]
++ [String
p, String
f])
                         return $ lines $ toString r

-- | If name doesn't exist in repo or is not a file, throw 'NotFound' exception.
ensureFileExists :: FilePath -> FilePath -> IO ()
ensureFileExists :: String -> String -> IO ()
ensureFileExists String
repo String
name = do
  isFile <- String -> IO Bool
doesFileExist (String
repo String -> String -> String
</> String -> String
encodeArg String
name)
  unless isFile $ throwIO NotFound

-- | Check that the filename/location is within the given repo, and not inside
-- any of the (relative) paths in @excludes@.  Create the directory if needed.
-- If everything checks out, then perform the specified action.
withSanityCheck :: FilePath
                -> [FilePath]
                -> FilePath
                -> IO b 
                -> IO b
withSanityCheck :: forall b. String -> [String] -> String -> IO b -> IO b
withSanityCheck String
repo [String]
excludes String
name IO b
action = do
  let filename :: String
filename = String
repo String -> String -> String
</> String -> String
encodeArg String
name
  let insideRepo :: Bool
insideRepo = String
filename String -> String -> Bool
`isInsideDir` String
repo
  let insideExcludes :: Bool
insideExcludes = [Bool] -> Bool
forall (t :: * -> *). Foldable t => t Bool -> Bool
or ([Bool] -> Bool) -> [Bool] -> Bool
forall a b. (a -> b) -> a -> b
$ (String -> Bool) -> [String] -> [Bool]
forall a b. (a -> b) -> [a] -> [b]
map (String
filename String -> String -> Bool
`isInsideDir`)
                          ([String] -> [Bool]) -> [String] -> [Bool]
forall a b. (a -> b) -> a -> b
$ (String -> String) -> [String] -> [String]
forall a b. (a -> b) -> [a] -> [b]
map (String
repo String -> String -> String
</>) [String]
excludes
  Bool -> IO () -> IO ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (Bool
insideExcludes Bool -> Bool -> Bool
|| Bool -> Bool
not Bool
insideRepo)
    (IO () -> IO ()) -> IO () -> IO ()
forall a b. (a -> b) -> a -> b
$ FileStoreError -> IO ()
forall e a. (HasCallStack, Exception e) => e -> IO a
throwIO FileStoreError
IllegalResourceName
  Bool -> String -> IO ()
createDirectoryIfMissing Bool
True (String -> IO ()) -> String -> IO ()
forall a b. (a -> b) -> a -> b
$ String -> String
takeDirectory String
filename
  IO b
action

-- | Uses grep to search a file-based repository. Note that this calls out to grep; and so
--   is generic over repos like git or darcs-based repos. (The git FileStore instance doesn't
--   use this because git has builtin grep functionality.)
--   Expected usage is to specialize this function with a particular backend's 'index'.
grepSearchRepo :: (FilePath -> IO [String]) -> FilePath -> SearchQuery -> IO [SearchMatch]
grepSearchRepo :: (String -> IO [String])
-> String -> SearchQuery -> IO [SearchMatch]
grepSearchRepo String -> IO [String]
indexer String
repo SearchQuery
query = do
  let opts :: [String]
opts = [String
"-I", String
"--line-number", String
"--with-filename"] [String] -> [String] -> [String]
forall a. [a] -> [a] -> [a]
++
             [String
"-i" | SearchQuery -> Bool
queryIgnoreCase SearchQuery
query] [String] -> [String] -> [String]
forall a. [a] -> [a] -> [a]
++
             (if SearchQuery -> Bool
queryWholeWords SearchQuery
query then [String
"--word-regexp"] else [String
"-E"])
  let regexps :: [String]
regexps = (String -> String) -> [String] -> [String]
forall a b. (a -> b) -> [a] -> [b]
map String -> String
escapeRegexSpecialChars ([String] -> [String]) -> [String] -> [String]
forall a b. (a -> b) -> a -> b
$ SearchQuery -> [String]
queryPatterns SearchQuery
query
  files <- String -> IO [String]
indexer String
repo
  if queryMatchAll query
     then do
       filesMatchingAllPatterns <- liftM (foldr1 intersect) $ mapM (regSearchFiles repo files) regexps
       output <- mapM (regsSearchFile opts repo regexps) filesMatchingAllPatterns
       return $ map parseMatchLine $ concat output
     else do
       (_status, _errOutput, output) <-
            runShellCommand repo Nothing "grep" $ opts ++
                                                  concatMap (\String
term -> [String
"-e", String
term]) regexps ++
                                                  files
       let results = String -> [String]
lines (String -> [String]) -> String -> [String]
forall a b. (a -> b) -> a -> b
$ ByteString -> String
toString ByteString
output
       return $ map parseMatchLine results

-- | we don't actually need the contents, just want to check that the directory exists and we have enough permissions
withVerifyDir :: FilePath -> IO a -> IO a
withVerifyDir :: forall a. String -> IO a -> IO a
withVerifyDir String
d IO a
a =
  IO a -> (IOException -> IO a) -> IO a
forall e a. Exception e => IO a -> (e -> IO a) -> IO a
E.catch (([String] -> String) -> IO [String] -> IO String
forall (m :: * -> *) a1 r. Monad m => (a1 -> r) -> m a1 -> m r
liftM [String] -> String
forall a. HasCallStack => [a] -> a
head (String -> IO [String]
getDirectoryContents (String -> IO [String]) -> String -> IO [String]
forall a b. (a -> b) -> a -> b
$ String -> String
encodeArg String
d) IO String -> IO a -> IO a
forall a b. IO a -> IO b -> IO b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> IO a
a) ((IOException -> IO a) -> IO a) -> (IOException -> IO a) -> IO a
forall a b. (a -> b) -> a -> b
$ \(IOException
e :: E.IOException) ->
    if IOException -> Bool
isDoesNotExistError IOException
e
       then FileStoreError -> IO a
forall e a. (HasCallStack, Exception e) => e -> IO a
throwIO FileStoreError
NotFound
       else FileStoreError -> IO a
forall e a. (HasCallStack, Exception e) => e -> IO a
throwIO (FileStoreError -> IO a)
-> (IOException -> FileStoreError) -> IOException -> IO a
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> FileStoreError
UnknownError (String -> FileStoreError)
-> (IOException -> String) -> IOException -> FileStoreError
forall b c a. (b -> c) -> (a -> b) -> a -> c
. IOException -> String
forall a. Show a => a -> String
show (IOException -> IO a) -> IOException -> IO a
forall a b. (a -> b) -> a -> b
$ IOException
e