| document.addEventListener("DOMContentLoaded", function() { |
| const select = document.getElementById('language-family-select'); |
| const hoverBox = document.getElementById('hover-box'); |
| const searchBar = document.getElementById('search-bar'); |
| const depthSelector = document.getElementById('depth-number'); |
| const tokenizerFilterDiv = document.getElementById('tokenizer-filter'); |
| const scriptFiltersDiv = document.getElementById('script-filter'); |
| const showNonGlotlidCheckbox = document.getElementById('show-non-glotlid'); |
| const clearTokenizersFilter = document.getElementById('clear-tokenizers'); |
| const clearScriptsFilter = document.getElementById('clear-scripts'); |
| let nodeToCenter = null; |
| let currentTreeData = null; |
| let expandedNodes; |
| let currentTransform = d3.zoomIdentity; |
| |
| let tokenizerNames; |
| let scriptNames; |
| let color; |
|
|
| function setExpanded(id, state = true){ |
| if (state) { |
| expandedNodes.add(id) |
| } |
| else if(expandedNodes.has(id)) { |
| expandedNodes.delete(id) |
| } |
| } |
|
|
| function addNodeIds(tree){ |
| function addIdToSubtree(tree, nodeCount = 0){ |
| tree.id = nodeCount++; |
| let tree_subtreeSize = tree.children.length === 0 ? 1 : 0; |
| let node = { |
| ...tree, |
| children: (tree.children || []).map(child => { |
| const [subtree, newCount, subtreeSize] = addIdToSubtree(child, nodeCount); |
| nodeCount = newCount; |
| tree_subtreeSize += subtreeSize; |
| return subtree; |
| }) |
| }; |
| node.subtreeSize = tree_subtreeSize |
| return [node, nodeCount, tree_subtreeSize]; |
| } |
| const [parsedTree, _finalCount] = addIdToSubtree(tree); |
| return parsedTree; |
| } |
|
|
| function loadLanguageFamily() { |
| const family = select.value; |
| fetch(`data/${family}.json`) |
| .then(response => response.json()) |
| .then(data => { |
| currentTreeData = addNodeIds(data); |
| expandedNodes = new Set([0]); |
| updateTokenizerFilter(data); |
| updateScriptFilter(data); |
| drawVisibleNodes(true); |
| }); |
| } |
| loadLanguageFamily(); |
| select.addEventListener('change', loadLanguageFamily); |
| searchBar.addEventListener('input', () => searchNode(searchBar.value)); |
|
|
| depthSelector.addEventListener('change', (_) => drawVisibleNodes()); |
| showNonGlotlidCheckbox.addEventListener('click', (_) => drawVisibleNodes()); |
| clearTokenizersFilter.addEventListener('click', (_) => { |
| tokenizerFilterDiv.querySelectorAll('input:checked').forEach((a) => a.checked = false); |
| drawVisibleNodes(); |
| }) |
| clearScriptsFilter.addEventListener('click', (_) => { |
| scriptFiltersDiv.querySelectorAll('input:checked').forEach((a) => a.checked = false); |
| drawVisibleNodes(); |
| }) |
|
|
| function getScriptNames(node, namesSet = new Set('x')) { |
| if (!node) |
| return namesSet; |
| if (node.scripts.length > 0) { |
| for(const script of node.scripts) |
| namesSet.add(script); |
| } |
| if (node.children) { |
| node.children.forEach(child => getScriptNames(child, namesSet)); |
| } |
| return namesSet; |
| } |
|
|
| function updateScriptFilter(data) { |
| scriptNames = Array.from(getScriptNames(data)); |
| scriptFiltersDiv.innerHTML = ''; |
| scriptNames.forEach(name => { |
| const checkbox = document.createElement('input'); |
| checkbox.type = 'checkbox'; |
| checkbox.value = name; |
| checkbox.checked = true; |
| checkbox.addEventListener('change', () => drawVisibleNodes()); |
| const label = document.createElement('label'); |
| label.appendChild(checkbox); |
| label.appendChild(document.createTextNode(name)); |
|
|
| scriptFiltersDiv.appendChild(label); |
| scriptFiltersDiv.appendChild(document.createElement('br')); |
| }); |
| } |
|
|
| function updateTokenizerFilter(data) { |
| tokenizerNames = Array.from(getTokenizerNames(data)); |
| |
| color = d3.scaleOrdinal(tokenizerNames, d3.schemeCategory10); |
| tokenizerFilterDiv.innerHTML = ''; |
| tokenizerNames.forEach(name => { |
| const checkbox = document.createElement('input'); |
| checkbox.type = 'checkbox'; |
| checkbox.value = name; |
| checkbox.checked = true; |
| checkbox.addEventListener('change', () => drawVisibleNodes()); |
| const label = document.createElement('label'); |
| |
| const icon = document.createElement('span'); |
| icon.style.display = 'inline-block'; |
| icon.style.width = '10px'; |
| icon.style.height = '10px'; |
| icon.style.backgroundColor = color(name); |
| icon.style.marginRight = '5px'; |
|
|
| label.appendChild(icon); |
| label.appendChild(checkbox); |
| label.appendChild(document.createTextNode(name)); |
|
|
| tokenizerFilterDiv.appendChild(label); |
| tokenizerFilterDiv.appendChild(document.createElement('br')); |
| }); |
| } |
|
|
| function getTokenizerNames(node, namesSet = new Set('x')) { |
| if (!node) |
| return namesSet; |
| if (node.tokenizers) { |
| for(const [script, tokenizer] of Object.entries(node.tokenizers)) |
| namesSet.add(tokenizer.original_lang_name); |
| } |
| if (node.children) { |
| node.children.forEach(child => getTokenizerNames(child, namesSet)); |
| } |
| return namesSet; |
| } |
|
|
| function createTree(data, recenter = false) { |
| |
| d3.select("#tree-container svg").remove(); |
|
|
| |
| const width = window.innerWidth; |
| const height = window.innerHeight; |
|
|
| |
| const svg = d3.select("#tree-container") |
| .append("svg") |
| .attr("width", width) |
| .attr("height", height); |
|
|
| const g = svg.append("g"); |
|
|
| |
| const zoom = d3.zoom() |
| .scaleExtent([0.1, 5]) |
| .on("zoom", function(event) { |
| currentTransform = event.transform; |
| g.attr("transform", event.transform); |
| }); |
|
|
| |
| const root = d3.hierarchy(data); |
|
|
| |
| const treeLayout = d3.tree().nodeSize([200, 100]); |
| treeLayout(root); |
|
|
| |
| g.selectAll('.link') |
| .data(root.links()) |
| .enter() |
| .append('path') |
| .attr('class', 'link') |
| .attr('d', d3.linkVertical() |
| .x(d => d.x) |
| .y(d => d.y)) |
| .attr('stroke', '#ccc') |
| .attr('fill', 'none'); |
|
|
| |
| const node = g.selectAll('.node') |
| .data(root.descendants()) |
| .enter() |
| .append('g') |
| .attr('class', 'node') |
| .attr('transform', d => `translate(${d.x},${d.y})`); |
|
|
| const sizeScale = d3.scaleSqrt() |
| .domain([1, root.data.subtreeSize]) |
| .range([5, 20]); |
|
|
|
|
| const selectedTokenizers = Array.from(tokenizerFilterDiv.querySelectorAll('input:checked')) |
| .map(input => input.value); |
| const selectedScripts = Array.from(scriptFiltersDiv.querySelectorAll('input:checked')) |
| .map(input => input.value); |
| function getColorTokenizer(node) { |
| |
| const toks = Object.entries(node.tokenizers).filter(([script, obj]) => selectedScripts.includes(script) && selectedTokenizers.includes(obj.original_lang_name)); |
| return toks.length > 0 ? toks[0][1].original_lang_name : 'unknown'; |
| } |
|
|
| |
| node.append('circle') |
| .filter(d => !d.data.iso_3_code) |
| .attr('r', d => sizeScale(d.data.subtreeSize)) |
| .attr('fill', d => { |
| const tokenizerName = getColorTokenizer(d.data); |
| return color(tokenizerName); |
| }); |
| node.filter(d => d.data.iso_3_code && d.data.native_tokenizers.length === 0) |
| .append('rect') |
| .attr('width', 10) |
| .attr('height', 10) |
| .attr('x', -5) |
| .attr('y', -5) |
| .attr('fill', d => { |
| const tokenizerName = getColorTokenizer(d.data); |
| return color(tokenizerName); |
| }); |
| node.filter(d => d.data.native_tokenizers.length !== 0) |
| .append('path') |
| .attr('d', d3.symbol().type(d3.symbolTriangle).size(100)) |
| .attr('fill', d => { |
| const tokenizerName = getColorTokenizer(d.data); |
| return color(tokenizerName); |
| }); |
|
|
| |
| node.append('text') |
| .attr('dy', 4) |
| .attr('x', d => sizeScale(d.data.subtreeSize) + 4) |
| .attr('text-anchor', 'start') |
| .text(d => { |
| const tokenizerName = getColorTokenizer(d.data); |
| return `${d.data.name} - ${tokenizerName || 'x'}${d.data.iso_3_code ? '' : ' (' + d.data.subtreeSize + ')'}` |
| }); |
|
|
| if (!currentTransform || recenter) |
| currentTransform = d3.zoomIdentity.translate(width / 2, height / 2); |
|
|
| if (nodeToCenter){ |
| debugger; |
| let element = g.selectAll('.node').filter(d => d.data.id === nodeToCenter).data()[0]; |
| if (element){ |
| const x = element.x; |
| const y = element.y; |
| const scale = 1.0; |
|
|
| currentTransform = d3.zoomIdentity.translate(window.innerWidth / 2 - x, window.innerHeight / 2 - y).scale(scale); |
| } |
| nodeToCenter = null; |
| } |
| svg.call(zoom).call(zoom).call(zoom.transform, d3.zoomIdentity.translate(currentTransform.x, currentTransform.y).scale(currentTransform.k)); |
|
|
|
|
| |
| node.on("mouseover", function(event, d) { |
| hoverBox.style.display = "block"; |
| hoverBox.style.left = (event.pageX) + "px"; |
| hoverBox.style.top = (event.pageY) + "px"; |
| const tokenizersList = Object.keys(d.data.tokenizers).map((script) => |
| `<li><em>${script}</em>: ${d.data.tokenizers[script]['class_name']}-${d.data.tokenizers[script]['original_lang_name']}${d.data.native_tokenizers.includes(script) ? '(👤)' :'(🤖)'}</li>`).join('') |
| hoverBox.innerHTML = ` |
| <strong>Name:</strong> ${d.data.name}<br> |
| ${d.data.iso_1_code ? '<strong>ISO 1 Code:</strong> ' + d.data.iso_1_code + '<br>' : ''} |
| ${d.data.iso_3_code ? '<strong>ISO 3 Code:</strong> ' + d.data.iso_3_code + '<br>' : ''} |
| ${d.data.scripts.length > 0 ? '<strong>Scripts:</strong> ' + d.data.scripts.join(', ') + '<br>' : ''} |
| ${d.data.iso_3_code ? '<strong>In GlotLID:</strong> ' + (d.data.scripts.length > 0 ? 'YES' : 'NO') + '<br>' : ''} |
| |
| <strong>Tokenizers:</strong><ul>${tokenizersList}</ul> |
| <strong>Subtree size:</strong> ${d.data.subtreeSize} |
| `; |
| }).on("mousemove", function(event) { |
| hoverBox.style.left = (event.pageX) + "px"; |
| hoverBox.style.top = (event.pageY) + "px"; |
| }).on("mouseout", function() { |
| hoverBox.style.display = "none"; |
| }); |
|
|
| node.on('click', function(event, d) { |
| setExpanded(d.data.id, !expandedNodes.has(d.data.id)) |
| hoverBox.style.display = "none"; |
| drawVisibleNodes(); |
| }) |
| } |
|
|
| function drawVisibleNodes(recenter = false) { |
| if (!currentTreeData) return; |
|
|
| |
| const selectedTokenizers = Array.from(tokenizerFilterDiv.querySelectorAll('input:checked')) |
| .map(input => input.value); |
| const selectedScripts = Array.from(scriptFiltersDiv.querySelectorAll('input:checked')) |
| .map(input => input.value); |
| const showNonGlotlid = showNonGlotlidCheckbox.checked; |
|
|
| |
| function filterHierarchy(node, parentExpanded = true, depth = 0) { |
| if ((node.iso_3_code && node.scripts.length === 0 && !showNonGlotlid) || (!parentExpanded && !(depthSelector.value == 0 || depth <= depthSelector.value))) { |
| return null; |
| } |
|
|
| const filteredChildren = (node.children || []) |
| .map(child => filterHierarchy(child, expandedNodes.has(node.id), depth + 1)) |
| .filter(child => child !== null); |
|
|
| const shouldBeShown = selectedTokenizers.some(tok => (Object.keys(node.tokenizers).length > 0 ? Object.values(node.tokenizers).map(x => x.original_lang_name) : ['x']).includes(tok)) && |
| selectedScripts.some(scr => (node.scripts.length ? node.scripts : ['x']).includes(scr) && (node.scripts.length > 0 || showNonGlotlid)); |
|
|
| if (!parentExpanded && filteredChildren.length === 0 && !(node.children.length === 0 && shouldBeShown) && node.id != 0) |
| return null; |
|
|
| return { |
| ...node, |
| children: filteredChildren |
| }; |
| } |
|
|
| const filteredData = filterHierarchy(currentTreeData); |
| createTree(filteredData, recenter); |
| } |
|
|
| function searchNode(name) { |
| name = name.toLowerCase(); |
| if (!currentTreeData || name.length < 2) return; |
|
|
| |
| function expandNode(node) { |
| if (node.name.toLowerCase() === name || node.iso_3_code === name || node.iso_1_code === name) { |
| return node; |
| } |
|
|
| for (const child of (node.children || [])) { |
| const found = expandNode(child); |
| if (found) { |
| setExpanded(node.id) |
| return found; |
| } |
| } |
|
|
| return null; |
| } |
|
|
| const expandedNode = expandNode(currentTreeData); |
| if (expandedNode) { |
| nodeToCenter = expandedNode.id; |
| drawVisibleNodes(); |
| } |
| } |
| }); |
|
|