Files
jungle/pkgs/intel-oneapi/deb/update.hs
Aleix Boné a5a0391b9c Only save relevant packages from intel repo
Refactored the parsing and dependency resolution logic into a single
Haskell script.
2026-03-20 16:57:36 +01:00

227 lines
7.6 KiB
Haskell

{-# LANGUAGE OverloadedStrings #-}
module Main (main) where
import Control.Arrow (second, (&&&))
import qualified Data.Map.Lazy as M
import qualified Data.Map.Strict as MS
import qualified Data.Text as T
import qualified Data.Text.IO as TIO
import Data.Char (isAlpha, isNumber)
import Data.List (find, nub, sortOn)
import Data.Map.Lazy (Map)
import Data.Maybe (catMaybes, fromMaybe, maybeToList)
import Data.Ord (Down (Down))
import Data.Text.Read (decimal)
import Debug.Trace (trace, traceShow)
import System.Environment (getArgs)
import System.IO (stderr)
import Text.Printf (PrintfType, hPrintf, printf)
type Str = T.Text
type Data = Map Str [Package]
type Version = [Int]
data Package = Package
{ name :: Str
, version :: Version
, filename, sha256 :: Str
, size :: Int
, depends :: [Dependency]
}
deriving (Show, Eq)
data Dependency = Dependency
{ pkgName :: Str
, pkgVersion :: Maybe (Ordering', Version)
}
deriving (Show, Eq)
-- Standard Ordering cannot represent GE or LE, we have to cook our own
data Ordering' = GT' | LT' | EQ' | GE' | LE' deriving (Show, Eq)
-- PARSING
parse :: Str -> Data
parse =
M.map (sortOn (Down . version))
. M.fromListWith (++)
. fmap (second pure)
. catMaybes
. fmap
( (parsePackage . M.fromList)
. fmap (second (T.strip . T.drop 1) . T.breakOn ":")
. T.lines
. T.strip
)
. T.splitOn "\n\n"
parseDependency :: Str -> Dependency
parseDependency t = case (T.words t) of
(a : b : c : []) -> mkPkg a b c
(a : c : []) -> let (a', b) = T.breakOn "(" a in mkPkg a' b c -- workaround for packages that don't have a space between the name and the version parenthesis
(a : []) -> Dependency a Nothing
(b) -> traceShow b $ undefined
where
mkPkg a b c = Dependency a (Just ((toOrd' $ T.drop 1 b), (parseVersion $ T.dropEnd 1 c)))
toOrd' :: Str -> Ordering'
toOrd' ">>" = GT'
toOrd' "<<" = LT'
toOrd' "=" = EQ'
toOrd' ">=" = GE'
toOrd' "<=" = LE'
toOrd' t = traceShow t $ undefined
dropEmpty :: [Str] -> [Str]
dropEmpty = Prelude.filter (not . T.null)
parseVersion :: Str -> Version
parseVersion = fmap (unwrapEither . decimal) . dropEmpty . T.split (not . isNumber)
where
unwrapEither (Right (a, "")) = a
-- should never fail, since we filtered non numbers for the split and removed empty strings
unwrapEither (Left err) = trace err $ undefined
unwrapEither (Right (_, b)) = trace (T.unpack b) $ undefined
parsePackage :: Map Str Str -> Maybe (Str, Package)
parsePackage d =
(,)
<$> package
<*> ( Package
<$> package
<*> (parseVersion <$> d M.!? "Version")
<*> (d M.!? "Filename")
<*> (d M.!? "SHA256")
<*> (read . T.unpack <$> (d M.!? "Installed-Size"))
<*> (pure $ fmap parseDependency $ dropEmpty $ (>>= T.splitOn ",") $ maybeToList $ d M.!? "Depends")
)
where
package = d M.!? "Package"
-- DEPENDENCY SOLVER
-- Compare versions in debian control file syntax
-- See: https://www.debian.org/doc/debian-policy/ch-relationships.html#syntax-of-relationship-fields
--
-- NOTE: this is not a proper version comparison
--
-- A proper version solver, should aggregate dependencies with the same name
-- and compute the constraint (e.g. a (>= 2) a (<< 5) -> 2 <= a << 5)
--
-- But in the intel repo, there are no such "duplicated" dependencies to specify
-- upper limits, which leads to issues when intel-hpckit-2021 depends on things
-- like intel-basekit >= 2021.1.0-2403 and we end up installing the newest
-- basekit instead of the one from 2021.1
--
-- To mitigate this, >= is set to take the latest version with matching major
-- and minor (only revision and patch are allowed to change)
compareVersions :: Maybe (Ordering', Version) -> Version -> Bool
compareVersions Nothing _ = True
compareVersions (Just (kind, want)) got
| null want = True
| kind == GE' = and (Prelude.take 2 $ zipWith (==) got want) && result
| otherwise = result
where
result = matches (compare got want) kind
matches :: Ordering -> Ordering' -> Bool
matches EQ b = b `elem` [EQ', GE', LE']
matches LT b = b `elem` [LT', LE']
matches GT b = b `elem` [GT', GE']
findMatching :: Data -> Dependency -> Maybe Package
findMatching pkgList (Dependency name ver) = pkgList M.!? name >>= find (compareVersions (ver) . version)
getHpckit :: Data -> Version -> Maybe Package
getHpckit d v = findMatching d (Dependency "intel-hpckit" (Just (GE', v)))
-- TODO: replace maybeToList with proper error handling. Right now, if a
-- dependency is not found it is silently skipped.
solveDeps :: Data -> Package -> [Package]
solveDeps d p = removeDupes $ go ([], [p])
where
go (done, todo)
| null todo = done'
| otherwise = go (done', todo')
where
done' = filter (not . isBlacklisted) $ nub $ done ++ (filter isMetaPackage todo)
todo' = concatMap depends todo >>= maybeToList . findMatching d
isMetaPackage = ((/= 0) . size) -- Packages with size 0
removeDupes :: [Package] -> [Package]
removeDupes l = fmap snd . MS.toList $ MS.fromListWith (getLatest) $ (cleanName . name &&& id) <$> l
getLatest a b
| version a > version b = a
| otherwise = b
-- Remove trailing version information from package name
cleanName :: T.Text -> T.Text
cleanName = T.dropWhileEnd (not . isAlpha)
isBlacklisted :: Package -> Bool
isBlacklisted = (`elem` blacklist) . cleanName . name
where
blacklist =
[ "intel-basekit-env"
, "intel-basekit-getting-started"
, "intel-hpckit-env"
, "intel-hpckit-getting-started"
, "intel-oneapi-advisor"
, "intel-oneapi-common-licensing"
, "intel-oneapi-common-oneapi-vars"
, "intel-oneapi-common-vars"
, "intel-oneapi-compiler-cpp-eclipse-cfg"
, "intel-oneapi-compiler-dpcpp-eclipse-cfg"
, "intel-oneapi-condaindex"
, "intel-oneapi-dev-utilities-eclipse-cfg"
, "intel-oneapi-dpcpp-ct-eclipse-cfg"
, "intel-oneapi-eclipse-ide"
, "intel-oneapi-hpc-toolkit-getting-started"
, "intel-oneapi-icc-eclipse-plugin-cpp"
, "intel-oneapi-inspector"
, "intel-oneapi-vtune"
, "intel-oneapi-vtune-eclipse-plugin-vtune"
]
-- OUTPUT
showVer, showVerDash :: [Int] -> T.Text
showVer = T.dropEnd 1 . T.concat . zipWith (flip (<>)) ([".", ".", "-"] ++ repeat ".") . fmap (T.pack . show)
showVerDash = T.intercalate "-" . fmap (T.pack . show)
displayTOML :: (PrintfType t) => Version -> Package -> t
displayTOML kit (Package n v f h _ _) = printf "[[hpckit-%s]]\nname=\"%s\"\nversion=\"%s\"\nfile=\"%s\"\nsha256=\"%s\"\n\n" (showVerDash kit) (cleanName n) (showVer v) f h
-- Reads debian Package List from apt repo from stdin and outputs TOML with
-- hpckit versions passed as arguments
main :: IO ()
main = do
args <- fmap T.pack <$> getArgs
let v = case args of
[] -> ["2025"]
_ -> args
d <- parse <$> TIO.getContents
sizes <- mapM (processHpcKit d . parseVersion) v
hPrintf stderr "Total Installed-Size: %.3fMb\n" ((fromIntegral (sum sizes)) / 1e6 :: Double)
where
-- TODO: replace fromMaybe+trace with either error handling?
processHpcKit :: Data -> Version -> IO Int
processHpcKit d v = do
let kit = getHpckit d v
deps = fromMaybe (trace (T.unpack $ "Could not find deps for kit: " <> showVer v <> " (SKIPPED)") $ []) $ solveDeps d <$> kit
totalSize = sum $ size <$> deps
mapM_ (displayTOML v) deps
return totalSize