diff --git a/src/App.tsx b/src/App.tsx index cf78e32..1a87715 100644 --- a/src/App.tsx +++ b/src/App.tsx @@ -1,6 +1,8 @@ import { useEffect, useState } from 'react'; import Editor from './components/Editor'; import { loadFromBytebin } from './util/storage'; +import { Language } from './util/language'; +import { detectLanguage } from './util/detect-language'; const INITIAL = Symbol(); const LOADING = Symbol(); @@ -13,7 +15,7 @@ export default function App() { const [state, setState] = useState(INITIAL); const [forcedContent, setForcedContent] = useState(''); const [actualContent, setActualContent] = useState(''); - const [contentType, setContentType] = useState(); + const [contentType, setContentType] = useState(); function setContent(content: string) { setActualContent(content); @@ -28,8 +30,14 @@ export default function App() { loadFromBytebin(pasteId).then(({ ok, content, type }) => { if (ok) { setContent(content); - if (type) { + if (type !== 'plain') { setContentType(type); + } else { + detectLanguage(pasteId).then(detectedLanguage => { + if (detectedLanguage) { + setContentType(detectedLanguage); + } + }); } } else { setContent(get404Message(pasteId)); diff --git a/src/components/Editor.tsx b/src/components/Editor.tsx index 74cea47..b9657b4 100644 --- a/src/components/Editor.tsx +++ b/src/components/Editor.tsx @@ -7,12 +7,13 @@ import themes, { Themes } from '../style/themes'; import EditorControls from './EditorControls'; import EditorGlobalStyle from './EditorGlobalStyle'; import EditorTextArea from './EditorTextArea'; +import { Language } from '../util/language'; export interface EditorProps { forcedContent: string; actualContent: string; setActualContent: (value: string) => void; - contentType?: string; + contentType?: Language; pasteId?: string; } diff --git a/src/components/EditorControls.tsx b/src/components/EditorControls.tsx index d95e0f6..81fb77e 100644 --- a/src/components/EditorControls.tsx +++ b/src/components/EditorControls.tsx @@ -4,7 +4,7 @@ import { MutableRefObject, useCallback, useEffect, useState } from 'react'; import styled from 'styled-components'; import themes, { Themes } from '../style/themes'; -import { languages } from '../util/highlighting'; +import { languages, unknownLanguage } from '../util/language'; import { saveToBytebin } from '../util/storage'; import Button from './Button'; import { ResetFunction } from './Editor'; @@ -104,9 +104,9 @@ export default function EditorControls({ } /> {readOnly && } diff --git a/src/util/constants.ts b/src/util/constants.ts index 87dfa5b..6a9adca 100644 --- a/src/util/constants.ts +++ b/src/util/constants.ts @@ -2,3 +2,7 @@ export const bytebinUrl = process.env.REACT_APP_BYTEBIN_URL || 'https://bytebin.lucko.me/'; export const postUrl = bytebinUrl + 'post'; + +export const languageDetectionUrl = + process.env.REACT_APP_LANG_DETECT_URL || + 'https://language-detection-service.pastes.dev/'; diff --git a/src/util/detect-language.ts b/src/util/detect-language.ts new file mode 100644 index 0000000..cf2911a --- /dev/null +++ b/src/util/detect-language.ts @@ -0,0 +1,49 @@ +import { languageDetectionUrl } from './constants'; +import { Language } from './language'; + +interface DetectedLanguage { + languageId: string; + confidence: number; +} + +export async function detectLanguage(id: string): Promise { + try { + const resp = await fetch(languageDetectionUrl + id); + if (resp.ok) { + const results = (await resp.json()) as DetectedLanguage[]; + for (const { languageId, confidence } of results) { + if (confidence > 0.5 && lookup[languageId]) { + return lookup[languageId]; + } + } + } + } catch (e) {} + return null; +} + +const lookup: Record = { + ini: 'log', // the model seems to confidently guess log files as ini - log is the more likely option + yaml: 'yaml', + md: 'markdown', + rb: 'ruby', + kt: 'kotlin', + xml: 'xml', + js: 'javascript', + html: 'html', + ts: 'typescript', + json: 'json', + php: 'php', + py: 'python', + rs: 'rust', + sql: 'sql', + sh: 'shell', + cpp: 'cpp', + go: 'go', + scala: 'scala', + dockerfile: 'dockerfile', + java: 'java', + cs: 'csharp', + css: 'css', + groovy: 'java', +}; +// missing: csv, ml, ex, pas, bat, lua, groovy, v, jl, pm, prolog, matlab, clj, f90, c, tex, coffee, ps1, hs, mm, cmake, erl, dm, dart, asm, makefile, r, swift, lisp, vba, toml, cbl diff --git a/src/util/highlighting.ts b/src/util/highlighting.ts deleted file mode 100644 index c6a3879..0000000 --- a/src/util/highlighting.ts +++ /dev/null @@ -1,23 +0,0 @@ -export const languages = { - text: ['plain', 'log'], - config: ['yaml', 'json', 'xml', 'ini'], - code: [ - 'java', - 'javascript', - 'typescript', - 'python', - 'kotlin', - 'scala', - 'cpp', - 'csharp', - 'shell', - 'ruby', - 'rust', - 'sql', - 'go', - ], - web: ['html', 'css', 'scss', 'php', 'graphql'], - misc: ['dockerfile', 'markdown', 'proto'], -}; - -export const languageIds = Object.values(languages).flat(1); diff --git a/src/util/language.ts b/src/util/language.ts new file mode 100644 index 0000000..fc616cc --- /dev/null +++ b/src/util/language.ts @@ -0,0 +1,70 @@ +export type Language = + | 'plain' + | 'plaintext' + | 'log' + | 'yaml' + | 'json' + | 'xml' + | 'ini' + | 'java' + | 'javascript' + | 'typescript' + | 'python' + | 'kotlin' + | 'scala' + | 'cpp' + | 'csharp' + | 'shell' + | 'ruby' + | 'rust' + | 'sql' + | 'go' + | 'html' + | 'css' + | 'scss' + | 'php' + | 'graphql' + | 'dockerfile' + | 'markdown' + | 'proto'; + +export const unknownLanguage: Language & 'plain' = 'plain'; + +export interface Languages { + text: Language[]; + config: Language[]; + code: Language[]; + web: Language[]; + misc: Language[]; +} + +export const languages: Languages = { + text: ['plaintext', 'log'], + config: ['yaml', 'json', 'xml', 'ini'], + code: [ + 'java', + 'javascript', + 'typescript', + 'python', + 'kotlin', + 'scala', + 'cpp', + 'csharp', + 'shell', + 'ruby', + 'rust', + 'sql', + 'go', + ], + web: ['html', 'css', 'scss', 'php', 'graphql'], + misc: ['dockerfile', 'markdown', 'proto'], +}; + +export const languageIds: Language[] = [ + ...Object.values(languages).flat(1), + unknownLanguage, +]; + +export function isLanguage(lang: string): lang is Language { + return languageIds.includes(lang as Language); +} diff --git a/src/util/storage.ts b/src/util/storage.ts index bae6508..b24d3d7 100644 --- a/src/util/storage.ts +++ b/src/util/storage.ts @@ -1,12 +1,12 @@ import { gzip } from 'pako'; import MIMEType from 'whatwg-mimetype'; import { bytebinUrl, postUrl } from './constants'; -import { languageIds } from './highlighting'; +import { isLanguage, Language } from './language'; interface LoadResultSuccess { ok: true; content: string; - type?: string; + type?: Language; } interface LoadResultFail { @@ -64,13 +64,21 @@ export async function saveToBytebin( return null; } -export function contentTypeToLanguage(contentType: string) { +export function contentTypeToLanguage( + contentType: string +): Language | undefined { const { type, subtype: subType } = new MIMEType(contentType); if (type === 'application' && subType === 'json') { return 'json'; } - if (type === 'text' && languageIds.includes(subType.toLowerCase())) { - return subType.toLowerCase(); + + let subTypeLower = subType.toLowerCase(); + if (subTypeLower.startsWith('x-')) { + subTypeLower = subTypeLower.substring(2); + } + + if (type === 'text' && isLanguage(subTypeLower)) { + return subTypeLower; } }