I’m working through converting my blog from Drupal to Jekyll (it’s a long story) and one of the things I needed to do is
to convert a bunch of posts originally written in HTML into Markdown. With a little application of PowerShell, most of
the heavy lifting was done fairly quickly - leaving just a manual review and tweak of each post.
Here’s the core of the PowerShell script I used:
foreach ( $source in ( get-childitem . \_posts\ *. md )) {
$sourceName = $source . Name
Write-Host $sourceName
# Load the contents of the file as a string
$content = get-content $source | join-string -newline
$content = " $content "
# Convert Links from <a> to Markdown style
$content = $content -replace '<a\s+href="([^"]+)">([^<]+)</a>' , '[$2]($1)'
# Convert paragraphs and lists
$content = $content -replace "\s*<ul>\s*" , " `r`n "
$content = $content -replace "\s*</ul>\s*" , " `r`n "
$content = $content -replace "\s*<ol>\s*" , " `r`n "
$content = $content -replace "\s*</ol>\s*" , " `r`n "
$content = $content -replace "<p>" , " `r`n "
$content = $content -replace "</p>" , " `r`n "
$content = $content -replace "<li>" , " `r`n * "
$content = $content -replace "</li>" , ""
# Word wrap each paragraph
$content = $content -split " `r`n " | foreach-object { wrap-string $_ 120 } | join-string -separator " `r`n "
# Word/Phrase highlighting
$content = $content -replace "<em>" , "*"
$content = $content -replace "</em>" , "*"
$content = $content -replace "<b>" , "**"
$content = $content -replace "</b>" , "**"
$content = $content -replace "<strong>" , "**"
$content = $content -replace "</strong>" , "**"
$content = $content -replace """ , "'"
$content = $content -replace "<!--break-->" , ""
# Eliminate excess whitespace
$content = $content -replace "/^\s* $ /" , ""
$content = $content -replace " `r`n`r`n`r`n " , " `r`n`r`n "
$content = $content -replace " `r`n`r`n`r`n " , " `r`n`r`n "
$content = $content -replace " `r`n`r`n`r`n " , " `r`n`r`n "
$content = $content -replace " `r`n`r`n`r`n " , " `r`n`r`n "
set-content . \_processed\ $sourceName -value $content
}
Comments
blog comments powered by Disqus