diff options
| author | Li Zhineng <[email protected]> | 2025-04-28 17:20:39 +0800 |
|---|---|---|
| committer | Li Zhineng <[email protected]> | 2025-04-28 17:20:39 +0800 |
| commit | 5aca6a5eef938b1fdedb4d14f5f7edcac91b5215 (patch) | |
| tree | 0ad65fe3724d29e98ae352eac0487138f05035c6 /scripts/update-data.php | |
| download | region-china-5aca6a5eef938b1fdedb4d14f5f7edcac91b5215.tar.gz region-china-5aca6a5eef938b1fdedb4d14f5f7edcac91b5215.zip | |
build data from raw html
Diffstat (limited to 'scripts/update-data.php')
| -rw-r--r-- | scripts/update-data.php | 63 |
1 files changed, 63 insertions, 0 deletions
diff --git a/scripts/update-data.php b/scripts/update-data.php new file mode 100644 index 0000000..1f43c4c --- /dev/null +++ b/scripts/update-data.php @@ -0,0 +1,63 @@ +<?php + +declare(strict_types=1); + +require_once __DIR__.'/../vendor/autoload.php'; + +use Zhineng\Region\Build\CodeToNameHandler; +use Zhineng\Region\Build\RelationshipHandler; +use Zhineng\Region\Build\TopLevelHandler; + +$doc = new \DOMDocument; +$doc->loadHTMLFile(__DIR__.'/../resources/raw.html', \LIBXML_NOERROR); + +/** @var \Zhineng\Region\Build\RegionHandler */ +$handlers = [ + $codeToNameHandler = new CodeToNameHandler, + $topLevelHandler = new TopLevelHandler, + $relationshipHandler = new RelationshipHandler, +]; + +$count = 0; + +$rows = $doc->getElementsByTagName('tr'); + +foreach ($rows as $row) { + $columns = $row->getElementsByTagName('td'); + + $columnCode = $columns->item(1); + $columnName = $columns->item(2); + + if (! $columnCode instanceof \DOMElement) { + continue; + } + + if (! $columnName instanceof \DOMElement) { + continue; + } + + $code = mb_trim($columnCode->textContent); + + if (! is_numeric($code)) { + continue; + } + + $name = rtrim(mb_trim($columnName->textContent), '*'); + + $count++; + + foreach ($handlers as $handler) { + $handler->handle($code, $name); + } +} + +printf("- Export code to name map\n"); +$codeToNameHandler->export(__DIR__.'/../resources/code-to-name.php'); + +printf("- Export top level list\n"); +$topLevelHandler->export(__DIR__.'/../resources/top-levels.php'); + +printf("- Export relationship map\n"); +$relationshipHandler->export(__DIR__.'/../resources/relationships.php'); + +printf("- Processed %d regions.\n", $count); |
