Export

Install https://github.com/famzah/google-sites-backup

google-sites-backup/run.sh gdata-python-client/ google-sites-backup/

Convert to Markdown

Install reverse_markdown

cd into the exported proj

find . -iname "*.html" -exec echo "tidy -q -omit -b -i -c {} | reverse_markdown > {}.md" \; | sed s/\.html\.md/\.md/ > fix.sh
chmod +x fix.sh
./fix.sh
find . -iname "*.html" | xargs rm

Cleanup

Add frontmatter

find . -iname "*.md" -exec perl -0777 -i -pe 's/<head>.*<\/head>//igs' {} \;
find . -iname "*.md" -exec perl -0777 -i -pe 's/^# (.*)$/---\nlayout: page\ntitle: $1\n---/m' {} \;

Clean up left over extras, spaces, extra header lines, and

find . -iname "*.md" | xargs -t -I {} sed -i'' 's/Â//g' {}
find . -iname "*.md" -exec perl -0777 -i -pe 's/^[\s\|]*$//gm' {} \;
find . -iname "*.md" -exec perl -0777 -i -pe 's/^.*?---/---/ms' {} \;
find . -iname "*.md" -exec perl -i -pe 's/^ ([^ ].*)$/$1/g' {} \;

Remove absolute links

ack --ignore-dir=_site -l "sites.google.com\/a\/roximity.com\/wiki" | xargs perl -i -pe "s/https:\/\/sites\.google\.com\/a\/roximity\.com\/wiki//g"

Fix resource links

ack --ignore-dir=_site -l "\/_\/rsrc\/\d*\/" | xargs perl -i -pe "s/\/_\/rsrc\/\d*\///g"

Rename %20 to underscores in file names.

for i in `find . -name "*%20*"`; do mv -v $i `echo $i | sed 's/%20/_/g'` ; done

Still had to do a fair amount of clean up from the converted markdown.

Plugins

These make the stucture and navigation match the google sites somewhat.

Lots of our page had files as downloads. I like the idea of putting downloads in a sub directory and having them auto populate on the page. Also some of our navigation is based on pages in a matching directory. This plugin populates a sub_pages collection and a downloads collection. The view renders those collections

module AssociateRelatedPages
  class Generator < Jekyll::Generator
    def generate(site)
      page_lookup = site.pages.reduce({}) { |lookup, page| lookup["/" + page.path] = page; lookup; }

      site.pages.each do |page|
        subdir = File.join(site.source, page.dir, page.basename)
        if File.exist?(subdir) and File.directory?(subdir)
          entries = Dir.entries(subdir)

          page.data["sub_pages"] = entries.select{ |e|
            e =~ /\.md$/
          }.map{ |e|
            page_lookup[File.join(page.dir, page.basename, e)]
          }

          page.data["downloads"] = entries.reject{ |e|
            e == "." || e == ".." || e =~ /\.md$/ || 
              File.directory?(File.join(subdir, e))
          }.map{ |e|
            download = File.join(subdir, e)
            stat = File::Stat.new(download)
            {
              "title" => e,
              "url" => File.join(page.basename, e),
              "size" => stat.size
            }
          }
        end
      end
    end
  end
end
{% if page.sub_pages.size > 0 %}
  <ul>
  {% for page in page.sub_pages %}
    <li>
      <a href="{{ page.url | prepend: site.baseurl }}">{{ page.title }}</a>
    </li>
  {% endfor %}
  </ul>
{% endif %}
{% if page.downloads.size > 0 %}
  <div class="post-downloads">
    <h2>Downloads</h2>
    <ul>
    {% for download in page.downloads %}
      <li>
        <a href="{{ download.url | prepend: site.baseurl }}">{{ download.title }} ({{ download.size }}b)</a>
      </li>
    {% endfor %}
    </ul>
  </div>
{% endif %}

The navigation on the google site was mostly based on sub directories. This creates a nav collection used to build the navigation.

module HierarchicalNavigation
  class Generator < Jekyll::Generator
    #{dev: { page: Page, sub: [] }}

    def generate(site)
      nav = {}
      site.pages.sort_by(&:dir).each do |page|
        dirs = page.dir.split('/')
        dir = dirs[1] || ''

        if dirs.count <= 2
          if page.basename == 'index'
            nav[dir] ||= {'page' => nil, 'sub' => []}
            nav[dir]['page'] = page
          else
            nav[dir] ||= {'page' => nil, 'sub' => []}
            nav[dir]['sub'] << page
          end
        end
      end

      site.data['nav'] = nav.values
    end
  end
end
<ul>
{% for nav in site.data['nav'] %}
  {% if nav.page.title %}
  <li class="{% if page.url contains nav.page.url %}active{% endif %}">
    <a class="page-link" href="{{ nav.page.url | prepend: site.baseurl }}">{{ nav.page.title }}</a>
    {% if page.url contains nav.page.dir %}
      <ul>
      {% for sub in nav.sub %}
        {% if sub.title %}
          {% capture sub_dir %}{{ sub.url | remove: ".html" | append: "/" }}{% endcapture %}
          <li class="{% if page.url contains sub.url or page.dir ==  sub_dir %}active{% endif %}">
            <a class="page-link" href="{{ sub.url | prepend: site.baseurl }}">{{ sub.title }}</a>
          </li>
        {% endif %}
      {% endfor %}
      </ul>
    {% endif %}
  </li>
  {% endif %}
{% endfor %}
</ul>