From 5aca6a5eef938b1fdedb4d14f5f7edcac91b5215 Mon Sep 17 00:00:00 2001 From: Li Zhineng Date: Mon, 28 Apr 2025 17:20:39 +0800 Subject: build data from raw html --- scripts/update-data.php | 63 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) create mode 100644 scripts/update-data.php (limited to 'scripts') diff --git a/scripts/update-data.php b/scripts/update-data.php new file mode 100644 index 0000000..1f43c4c --- /dev/null +++ b/scripts/update-data.php @@ -0,0 +1,63 @@ +loadHTMLFile(__DIR__.'/../resources/raw.html', \LIBXML_NOERROR); + +/** @var \Zhineng\Region\Build\RegionHandler */ +$handlers = [ + $codeToNameHandler = new CodeToNameHandler, + $topLevelHandler = new TopLevelHandler, + $relationshipHandler = new RelationshipHandler, +]; + +$count = 0; + +$rows = $doc->getElementsByTagName('tr'); + +foreach ($rows as $row) { + $columns = $row->getElementsByTagName('td'); + + $columnCode = $columns->item(1); + $columnName = $columns->item(2); + + if (! $columnCode instanceof \DOMElement) { + continue; + } + + if (! $columnName instanceof \DOMElement) { + continue; + } + + $code = mb_trim($columnCode->textContent); + + if (! is_numeric($code)) { + continue; + } + + $name = rtrim(mb_trim($columnName->textContent), '*'); + + $count++; + + foreach ($handlers as $handler) { + $handler->handle($code, $name); + } +} + +printf("- Export code to name map\n"); +$codeToNameHandler->export(__DIR__.'/../resources/code-to-name.php'); + +printf("- Export top level list\n"); +$topLevelHandler->export(__DIR__.'/../resources/top-levels.php'); + +printf("- Export relationship map\n"); +$relationshipHandler->export(__DIR__.'/../resources/relationships.php'); + +printf("- Processed %d regions.\n", $count); -- cgit v1.2.3