Finally fully separated Span's internals from code

pc2 · Feb 20, 2024 · f4c41cb · f4c41cb
1 parent 610dc1b
commit f4c41cb
Show file tree

Hide file tree

Showing 7 changed files with 150 additions and 141 deletions.
diff --git a/src/dev_aid/lsp.rs b/src/dev_aid/lsp.rs
@@ -7,16 +7,7 @@ use lsp_server::{Connection, Message, Response};
 use lsp_types::notification::Notification;
 
 use crate::{
-    arena_alloc::ArenaVector, 
-    file_position::Span,
-    ast::{IdentifierType, Module}, 
-    dev_aid::syntax_highlighting::create_token_ide_info, 
-    errors::{CompileError, ErrorCollector, ErrorLevel}, 
-    flattening::FlatID, 
-    instantiation::{SubModuleOrWire, CALCULATE_LATENCY_LATER}, 
-    linker::{FileData, FileUUID, FileUUIDMarker, Linker, LocationInfo}, 
-    parser::perform_full_semantic_parse, 
-    tokenizer::{CharLine, TokenizeResult}
+    arena_alloc::ArenaVector, ast::{IdentifierType, Module}, dev_aid::syntax_highlighting::create_token_ide_info, errors::{CompileError, ErrorCollector, ErrorLevel}, file_position::{CharLine, FileText, Span}, flattening::FlatID, instantiation::{SubModuleOrWire, CALCULATE_LATENCY_LATER}, linker::{FileData, FileUUID, FileUUIDMarker, Linker, LocationInfo}, parser::perform_full_semantic_parse
 };
 
 use super::syntax_highlighting::{IDETokenType, IDEIdentifierType, IDEToken};
@@ -187,7 +178,7 @@ fn do_syntax_highlight(file_data : &FileData, linker : &Linker) -> Vec<SemanticT
         let typ = get_semantic_token_type_from_ide_token(ide_tok);
         let mod_bits = get_modifiers_for_token(ide_tok);
 
-        let tok_range = file_data.tokens.get_token_linechar_range(tok_idx);
+        let tok_range = file_data.file_text.get_token_linechar_range(tok_idx);
         let start_pos = to_position(tok_range.start);
         let end_pos = to_position(tok_range.end);
 
@@ -214,40 +205,40 @@ fn do_syntax_highlight(file_data : &FileData, linker : &Linker) -> Vec<SemanticT
 
 use lsp_types::Diagnostic;
 
-fn cvt_span_to_lsp_range(ch_sp : Span, tokens : &TokenizeResult) -> lsp_types::Range {
-    let rng = tokens.get_span_linechar_range(ch_sp);
+fn cvt_span_to_lsp_range(ch_sp : Span, file_text : &FileText) -> lsp_types::Range {
+    let rng = file_text.get_span_linechar_range(ch_sp);
     Range {
         start: Position{character : rng.start.character as u32, line : rng.start.line as u32},
         end: Position{character : rng.end.character as u32, line : rng.end.line as u32}
     }
 }
 
 // Requires that token_positions.len() == tokens.len() + 1 to include EOF token
-fn convert_diagnostic(err : CompileError, main_tokens : &TokenizeResult, linker : &Linker, uris : &ArenaVector<Url, FileUUIDMarker>) -> Diagnostic {
-    assert!(main_tokens.is_span_valid(err.position), "bad error: {}", err.reason);
-    let error_pos = cvt_span_to_lsp_range(err.position, main_tokens);
+fn convert_diagnostic(err : CompileError, main_file_text : &FileText, linker : &Linker, uris : &ArenaVector<Url, FileUUIDMarker>) -> Diagnostic {
+    assert!(main_file_text.is_span_valid(err.position), "bad error: {}", err.reason);
+    let error_pos = cvt_span_to_lsp_range(err.position, main_file_text);
 
     let severity = match err.level {
         ErrorLevel::Error => DiagnosticSeverity::ERROR,
         ErrorLevel::Warning => DiagnosticSeverity::WARNING,
     };
     let mut related_info = Vec::new();
     for info in err.infos {
-        let info_tokens = &linker.files[info.file].tokens;
-        assert!(info_tokens.is_span_valid(info.position), "bad info: {}; in err: {}", info.info, err.reason);
-        let info_pos = cvt_span_to_lsp_range(info.position, info_tokens);
+        let info_file_text = &linker.files[info.file].file_text;
+        assert!(info_file_text.is_span_valid(info.position), "bad info: {}; in err: {}", info.info, err.reason);
+        let info_pos = cvt_span_to_lsp_range(info.position, info_file_text);
         let location = Location{uri : uris[info.file].clone(), range : info_pos};
         related_info.push(DiagnosticRelatedInformation { location, message: info.info });
     }
     Diagnostic::new(error_pos, Some(severity), None, None, err.reason, Some(related_info), None)
 }
 
 // Requires that token_positions.len() == tokens.len() + 1 to include EOF token
-fn send_errors_warnings(connection: &Connection, errors : ErrorCollector, main_tokens : &TokenizeResult, linker : &Linker, uris : &ArenaVector<Url, FileUUIDMarker>) -> Result<(), Box<dyn Error + Sync + Send>> {
+fn send_errors_warnings(connection: &Connection, errors : ErrorCollector, main_file_text : &FileText, linker : &Linker, uris : &ArenaVector<Url, FileUUIDMarker>) -> Result<(), Box<dyn Error + Sync + Send>> {
     let mut diag_vec : Vec<Diagnostic> = Vec::new();
     let (err_vec, file) = errors.get();
     for err in err_vec {
-        diag_vec.push(convert_diagnostic(err, main_tokens, linker, uris));
+        diag_vec.push(convert_diagnostic(err, main_file_text, linker, uris));
     }
 
     let params = &PublishDiagnosticsParams{
@@ -270,12 +261,12 @@ fn get_hover_info<'l>(file_cache : &'l LoadedFileCache, text_pos : &lsp_types::T
 
     let file_data = &file_cache.linker.files[uuid];
 
-    let token_idx = file_data.tokens.get_token_on_or_left_of(from_position(text_pos.position));
+    let token_idx = file_data.file_text.get_token_on_or_left_of(from_position(text_pos.position));
 
     let (info, span) = file_cache.linker.get_info_about_source_location(token_idx, uuid)?;
     //let span = Span::new_single_token(token_idx);
 
-    let char_line_range = file_data.tokens.get_span_linechar_range(span);
+    let char_line_range = file_data.file_text.get_span_linechar_range(span);
     Some((info, to_position_range(char_line_range)))
 }
 
@@ -284,7 +275,7 @@ fn push_all_errors(connection: &Connection, file_cache : &LoadedFileCache) -> Re
         let errors = file_cache.linker.get_all_errors_in_file(uuid);
 
         // println!("Errors: {:?}", &errors);
-        send_errors_warnings(&connection, errors, &file_data.tokens, &file_cache.linker, &file_cache.uris)?;
+        send_errors_warnings(&connection, errors, &file_data.file_text, &file_cache.linker, &file_cache.uris)?;
     }
     Ok(())
 }
@@ -392,7 +383,7 @@ fn handle_request(method : &str, params : serde_json::Value, file_cache : &mut L
                     LocationInfo::WireRef(md, decl_id) => {
                         let uri = file_cache.uris[md.link_info.file].clone();
                         let decl = md.flattened.instructions[decl_id].extract_wire_declaration();
-                        let range = to_position_range(file_cache.linker.files[md.link_info.file].tokens.get_token_linechar_range(decl.name_token));
+                        let range = to_position_range(file_cache.linker.files[md.link_info.file].file_text.get_token_linechar_range(decl.name_token));
                         GotoDefinitionResponse::Scalar(Location{uri, range})
                     }
                     LocationInfo::Temporary(_, _, _) => {
@@ -404,7 +395,7 @@ fn handle_request(method : &str, params : serde_json::Value, file_cache : &mut L
                     LocationInfo::Global(id) => {
                         if let Some(link_info) = file_cache.linker.get_link_info(id) {
                             let uri = file_cache.uris[link_info.file].clone();
-                            let range = to_position_range(file_cache.linker.files[link_info.file].tokens.get_span_linechar_range(link_info.name_span));
+                            let range = to_position_range(file_cache.linker.files[link_info.file].file_text.get_span_linechar_range(link_info.name_span));
                             GotoDefinitionResponse::Scalar(Location{uri, range})
                         } else {
                             GotoDefinitionResponse::Array(Vec::new())

diff --git a/src/dev_aid/syntax_highlighting.rs b/src/dev_aid/syntax_highlighting.rs
@@ -1,7 +1,7 @@
 
 use std::{ops::Range, path::PathBuf};
 
-use crate::{arena_alloc::ArenaVector, ast::*, file_position::Span, flattening::{Instruction, WireSource}, linker::{FileData, FileUUID, FileUUIDMarker, Linker, NameElem}, parser::*, tokenizer::*};
+use crate::{arena_alloc::ArenaVector, ast::*, file_position::{FileText, Span}, flattening::{Instruction, WireSource}, linker::{FileData, FileUUID, FileUUIDMarker, Linker, NameElem}, parser::*, tokenizer::*};
 
 use ariadne::FileCache;
 use console::Style;
@@ -44,21 +44,21 @@ fn pretty_print_chunk_with_whitespace(whitespace_start : usize, file_text : &str
     print!("{}{}", whitespace_text, st.apply_to(&file_text[text_span]));
 }
 
-fn print_tokens(file_text : &str, tokens : &TokenizeResult) {
+fn print_tokens(file_text : &FileText) {
     let mut whitespace_start : usize = 0;
-    for tok_idx in 0..tokens.len() {
+    for tok_idx in 0..file_text.num_tokens() {
         let styles = [Style::new().magenta(), Style::new().yellow(), Style::new().blue()];
         let st = styles[tok_idx % styles.len()].clone().underlined();
 
-        let token_range = tokens.get_token_range(tok_idx);
-        pretty_print_chunk_with_whitespace(whitespace_start, file_text, token_range.clone(), st);
+        let token_range = file_text.get_token_range(tok_idx);
+        pretty_print_chunk_with_whitespace(whitespace_start, &file_text.file_text, token_range.clone(), st);
         whitespace_start = token_range.end;
     }
 
-    print!("{}\n", &file_text[whitespace_start..file_text.len()]);
+    print!("{}\n", &file_text.file_text[whitespace_start..]);
 }
 
-fn pretty_print(file_text : &str, tokens : &TokenizeResult, ide_infos : &[IDEToken]) {
+fn pretty_print(file_text : &FileText, ide_infos : &[IDEToken]) {
     let mut whitespace_start : usize = 0;
 
     for (tok_idx, token) in ide_infos.iter().enumerate() {
@@ -83,12 +83,12 @@ fn pretty_print(file_text : &str, tokens : &TokenizeResult, ide_infos : &[IDETok
             }
         };
 
-        let tok_span = tokens.get_token_range(tok_idx);
-        pretty_print_chunk_with_whitespace(whitespace_start, file_text, tok_span.clone(), st);
+        let tok_span = file_text.get_token_range(tok_idx);
+        pretty_print_chunk_with_whitespace(whitespace_start, &file_text.file_text, tok_span.clone(), st);
         whitespace_start = tok_span.end;
     }
 
-    print!("{}\n", &file_text[whitespace_start..file_text.len()]);
+    print!("{}\n", &file_text.file_text[whitespace_start..]);
 }
 
 fn add_ide_bracket_depths_recursive<'a>(result : &mut [IDEToken], current_depth : usize, token_hierarchy : &[TokenTreeNode]) {
@@ -164,7 +164,7 @@ pub fn create_token_ide_info<'a>(parsed: &FileData, linker : &Linker) -> Vec<IDE
     let mut result : Vec<IDEToken> = Vec::new();
     result.reserve(parsed.tokens.len());
 
-    for &tok_typ in &parsed.tokens.token_types {
+    for &tok_typ in &parsed.tokens {
         let initial_typ = if is_keyword(tok_typ) {
             IDETokenType::Keyword
         } else if is_bracket(tok_typ) != IsBracket::NotABracket {
@@ -193,27 +193,27 @@ pub fn create_token_ide_info<'a>(parsed: &FileData, linker : &Linker) -> Vec<IDE
 }
 
 // Outputs character_offsets.len() == tokens.len() + 1 to include EOF token
-fn generate_character_offsets(file_text : &str, tokens : &TokenizeResult) -> Vec<Range<usize>> {
+fn generate_character_offsets(file_text : &FileText) -> Vec<Range<usize>> {
     let mut character_offsets : Vec<Range<usize>> = Vec::new();
-    character_offsets.reserve(tokens.len());
+    character_offsets.reserve(file_text.num_tokens());
 
     let mut cur_char = 0;
     let mut whitespace_start = 0;
-    for tok_idx in 0..tokens.len() {
-        let tok_range = tokens.get_token_range(tok_idx);
+    for tok_idx in 0..file_text.num_tokens() {
+        let tok_range = file_text.get_token_range(tok_idx);
 
         // whitespace
-        cur_char += file_text[whitespace_start..tok_range.start].chars().count();
+        cur_char += file_text.file_text[whitespace_start..tok_range.start].chars().count();
         let token_start_char = cur_char;
 
         // actual text
-        cur_char += file_text[tok_range.clone()].chars().count();
+        cur_char += file_text.file_text[tok_range.clone()].chars().count();
         character_offsets.push(token_start_char..cur_char);
         whitespace_start = tok_range.end;
     }
 
     // Final char offset for EOF
-    let num_chars_in_file = cur_char + file_text[whitespace_start..].chars().count();
+    let num_chars_in_file = cur_char + file_text.file_text[whitespace_start..].chars().count();
     character_offsets.push(cur_char..num_chars_in_file);
 
     character_offsets
@@ -249,7 +249,7 @@ pub fn print_all_errors(linker : &Linker, paths_arena : &ArenaVector<PathBuf, Fi
     let mut file_cache : FileCache = Default::default();
 
     for (file_uuid, f) in &linker.files {
-        let token_offsets = generate_character_offsets(&f.file_text, &f.tokens);
+        let token_offsets = generate_character_offsets(&f.file_text);
 
         let errors = linker.get_all_errors_in_file(file_uuid);
 
@@ -263,9 +263,9 @@ pub fn syntax_highlight_file(linker : &Linker, file_uuid : FileUUID, settings :
     let f = &linker.files[file_uuid];
 
     if settings.show_tokens {
-        print_tokens(&f.file_text, &f.tokens);
+        print_tokens(&f.file_text);
     }
 
     let ide_tokens = create_token_ide_info(f, linker);
-    pretty_print(&f.file_text, &f.tokens, &ide_tokens);
+    pretty_print(&f.file_text, &ide_tokens);
 }
diff --git a/src/file_position.rs b/src/file_position.rs
@@ -1,12 +1,8 @@
 use std::ops::Range;
 
-use crate::tokenizer::TokenizeResult;
-
-
-
 // Token span. Indices are INCLUSIVE
 #[derive(Clone,Copy,Debug,PartialEq,Eq,Hash)]
-pub struct Span(pub usize, pub usize);
+pub struct Span(usize, usize);
 
 impl Span {
     pub const MAX_POSSIBLE_SPAN : Span = Span(0, usize::MAX);
@@ -40,9 +36,6 @@ impl Span {
         assert!(start_tok <= end_tok);
         Span(start_tok, end_tok)
     }
-    pub fn whole_file_span(tokens : &TokenizeResult) -> Span {
-        Span(0, tokens.token_types.len())
-    }
     pub fn contains_token(&self, token_idx : usize) -> bool {
         token_idx >= self.0 && token_idx <= self.1
     }
@@ -102,3 +95,85 @@ impl BracketSpan {
 }
 
 
+
+#[derive(Clone, Copy, PartialEq, Eq)]
+pub struct CharLine {
+    pub line : usize,
+    pub character : usize
+}
+impl PartialOrd for CharLine {
+    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
+        Some(self.cmp(other))
+    }
+}
+impl Ord for CharLine {
+    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
+        self.line.cmp(&other.line).then(self.character.cmp(&other.character))
+    }
+}
+
+
+
+
+pub struct FileText {
+    pub file_text : String,
+    // List of all boundaries. Starts with 0, in whitespace mode, and then alternatingly switch to being a token, switch to being whitespace, back and forth
+    // The span of token i is given by token_boundaries[i*2+1..i*2+2]
+    // Ends at the end of the file, with a final whitespace block
+    token_boundaries : Vec<usize>,
+    token_boundaries_as_char_lines : Vec<CharLine>
+}
+
+impl FileText {
+    pub fn new(file_text : String, token_boundaries : Vec<usize>) -> Self {
+        let mut cur_position = CharLine{line: 0, character: 0};
+        let mut start = 0;
+        let token_boundaries_as_char_lines = token_boundaries.iter().map(|part_end| {
+            for c in file_text[start..*part_end].chars() {
+                if c == '\n' {
+                    cur_position.line += 1;
+                    cur_position.character = 0;
+                } else {
+                    cur_position.character += 1;
+                }
+            }
+            start = *part_end;
+            cur_position
+        }).collect();
+
+        FileText{file_text, token_boundaries, token_boundaries_as_char_lines}
+    }
+
+    pub fn num_tokens(&self) -> usize {
+        (self.token_boundaries.len() - 2) / 2
+    }
+    pub fn get_token_range(&self, token_idx : usize) -> Range<usize> {
+        self.token_boundaries[token_idx*2+1]..self.token_boundaries[token_idx*2+2]
+    }
+    pub fn get_token_linechar_range(&self, token_idx : usize) -> Range<CharLine> {
+        self.token_boundaries_as_char_lines[token_idx*2+1]..self.token_boundaries_as_char_lines[token_idx*2+2]
+    }
+    pub fn get_span_range(&self, span : Span) -> Range<usize> {
+        self.token_boundaries[span.0*2+1]..self.token_boundaries[span.1*2+2]
+    }
+    pub fn get_span_linechar_range(&self, span : Span) -> Range<CharLine> {
+        self.token_boundaries_as_char_lines[span.0*2+1]..self.token_boundaries_as_char_lines[span.1*2+2]
+    }
+
+    pub fn get_token_on_or_left_of(&self, char_line : CharLine) -> usize {
+        match self.token_boundaries_as_char_lines.binary_search(&char_line) {
+            Ok(idx) | Err(idx) => {
+                assert!(idx >= 1);
+                return (idx - 1) / 2;
+            }
+        }
+    }
+
+    pub fn whole_file_span(&self) -> Span {
+        Span(0, self.num_tokens() - 1)
+    }
+
+    pub fn is_span_valid(&self, span : Span) -> bool {
+        span.1 < self.num_tokens()
+    }
+}
diff --git a/src/flattening/mod.rs b/src/flattening/mod.rs
@@ -281,7 +281,7 @@ impl<'prev, 'inst, 'l, 'runtime> FlatteningContext<'prev, 'inst, 'l, 'runtime> {
     fn resolve_identifier(&self, identifier : &Identifier) -> LocalOrGlobal {
         // Possibly local
         if let Some(single_tok_idx) = identifier.span.is_single_token() {
-            assert!(self.linker.file.tokens.token_types[single_tok_idx] == TOKEN_IDENTIFIER);
+            assert!(self.linker.file.tokens[single_tok_idx] == TOKEN_IDENTIFIER);
             if let Some(decl_id) = self.local_variable_context.get_declaration_for(self.linker.file.get_token_text(single_tok_idx)) {
                 return LocalOrGlobal::Local(decl_id);
             }

diff --git a/src/linker.rs b/src/linker.rs
@@ -1,6 +1,6 @@
 use std::{collections::{HashMap, HashSet}, rc::Rc, cell::RefCell};
 
-use crate::{arena_alloc::{ArenaAllocator, UUIDMarker, UUID}, ast::{LinkInfo, Module}, errors::{error_info, ErrorCollector}, file_position::Span, flattening::{FlatID, FlattenedModule, Instruction, WireInstance, WireSource}, instantiation::InstantiatedModule, parser::{FullParseResult, TokenTreeNode}, tokenizer::TokenizeResult, typing::{Type, WrittenType}, util::{const_str_position, const_str_position_in_tuples}, value::Value};
+use crate::{arena_alloc::{ArenaAllocator, UUIDMarker, UUID}, ast::{LinkInfo, Module}, errors::{error_info, ErrorCollector}, file_position::{FileText, Span}, flattening::{FlatID, FlattenedModule, Instruction, WireInstance, WireSource}, instantiation::InstantiatedModule, parser::{FullParseResult, TokenTreeNode}, tokenizer::TokenTypeIdx, typing::{Type, WrittenType}, util::{const_str_position, const_str_position_in_tuples}, value::Value};
 
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
 pub struct ModuleUUIDMarker;
@@ -118,19 +118,19 @@ impl Linkable for NamedType {
 }
 
 pub struct FileData {
-    pub file_text : String,
-    pub tokens : TokenizeResult,
+    pub file_text : FileText,
+    pub tokens : Vec<TokenTypeIdx>,
     pub token_hierarchy : Vec<TokenTreeNode>,
     pub parsing_errors : ErrorCollector,
     pub associated_values : Vec<NameElem>
 }
 
 impl FileData {
     pub fn get_token_text(&self, token_idx : usize) -> &str {
-        &self.file_text[self.tokens.get_token_range(token_idx)]
+        &self.file_text.file_text[self.file_text.get_token_range(token_idx)]
     }
     pub fn get_span_text(&self, span : Span) -> &str {
-        &self.file_text[self.tokens.get_span_range(span)]
+        &self.file_text.file_text[self.file_text.get_span_range(span)]
     }
 }